#%%
import numpy as np
import pandas as pd
from statistics import mean 
import matplotlib.pyplot as plt
import scipy.stats as stats
from matplotlib import pyplot as plt
from pingouin import anova, ancova, print_table
from scipy.stats.stats import pearsonr
from scipy.stats.stats import spearmanr
from scipy.stats import f_oneway
import random
from scipy.stats import chi2_contingency
from statsmodels.multivariate.manova import MANOVA
import semopy
from semopy import semplot
from semopy import Model
import seaborn as sns
import pingouin as pg
from pingouin import mixed_anova

#%% Load and name results spreadsheets
org_pre1 = pd.read_excel('/Users/juliafine/learnpy/Conversation_1_preconversation_survey_for_current_organizers.xlsx')
org_pre1.name = 'org_pre1'
org_1 = pd.read_excel('/Users/juliafine/learnpy/Conversation_1_postconversation_survey_for_current_organizers.xlsx')
org_1.name = 'org_1'
org_2 = pd.read_excel('/Users/juliafine/learnpy/Conversation_2_postconversation_survey_for_current_organizers.xlsx')
org_2.name = 'org_2'
org_3 = pd.read_excel('/Users/juliafine/learnpy/Conversation_3_postconversation_survey_for_current_organizers.xlsx')
org_3.name = 'org_3'

par_pre1 = pd.read_excel('/Users/juliafine/learnpy/Conversation_1_preconversation_survey_for_conversational_partners.xlsx')
par_pre1.name = 'par_pre1'
par_1 = pd.read_excel('/Users/juliafine/learnpy/Conversation_1_postconversation_survey_for_conversational_partners.xlsx')
par_1.name = 'par_1'
par_2 = pd.read_excel('/Users/juliafine/learnpy/Conversation_2_postconversation_survey_for_conversational_partners.xlsx')
par_2.name = 'par_2'
par_3 = pd.read_excel('/Users/juliafine/learnpy/Conversation_3_postconversation_survey_for_conversational_partners.xlsx')
par_3.name = 'par_3'
par_followup = pd.read_excel('/Users/juliafine/learnpy/Followup_for_conversational_partners.xlsx')
par_followup.name = 'par_followup'

ctrl_1 = pd.read_excel('/Users/juliafine/learnpy/Survey_1.xlsx')
ctrl_1.name = 'ctrl_1'
ctrl_2 = pd.read_excel('/Users/juliafine/learnpy/Survey_2.xlsx')
ctrl_2.name = 'ctrl_2'
ctrl_3 = pd.read_excel('/Users/juliafine/learnpy/Survey_3.xlsx')
ctrl_3.name = 'ctrl_3'
ctrl_followup = pd.read_excel('/Users/juliafine/learnpy/Followup.xlsx')
ctrl_followup.name = 'ctrl_followup'

ctrl_new = pd.read_excel('/Users/juliafine/learnpy/Climate_Attitudes_and_Actions_Among_Friends_and_Family_of_Climate_Activists.xlsx')
ctrl_new.name = 'ctrl_new'

discourse = pd.read_excel('/Users/juliafine/learnpy/Discourse themes.xlsx')
discourse.name = 'discourse'

participant_list = pd.read_excel('/Users/juliafine/learnpy/Phase 2 participants.xlsx')
control_list = pd.read_excel('/Users/juliafine/learnpy/Phase 2 participants.xlsx', sheet_name = 'Controls')

#%% Set Likert value
likert = 4
#%% Make file lists
result_files = [org_pre1, org_1, org_2, org_3, par_pre1, par_1, par_2, par_3, par_followup, ctrl_1, ctrl_2, ctrl_3, ctrl_followup]
result_files_with_new_ctrl = [org_pre1, org_1, org_2, org_3, par_pre1, par_1, par_2, par_3, par_followup, ctrl_1, ctrl_2, ctrl_3, ctrl_followup, ctrl_new]
organizer_files = [org_pre1, org_1, org_2, org_3]
partner_files = [par_pre1, par_1, par_2, par_3, par_followup]
control_files = [ctrl_1, ctrl_2, ctrl_3, ctrl_followup]
demo = [org_pre1, par_pre1, ctrl_1, ctrl_new]
all_dfs = [org_pre1, org_1, org_2, org_3, par_pre1, par_1, par_2, par_3, par_followup, ctrl_1, ctrl_2, ctrl_3, ctrl_followup, discourse]
#%% Convert IDs to strings

#Define a function to format IDs
def format_id(num):
    #Turn the integer into a string
    num = str(num)
    #Get rid of the .0 at the end
    num = num.split(".")[0]
    #Add leading zero to IDs that are under 4 digits
    while len(num) < 4:
        num = "0" + num
    #Trim extra numbers off IDS that are too long
    while len(num) > 4:
        num = num[1:]
    return num

#Go through all data frames in result_files and convert the IDs to strings
for df in result_files:
    row_indices = df.index.tolist()

    for row_index in row_indices:
        id = df.at[row_index, "What are the last 4 digits of your cell phone number?"]
        id = format_id(id)
        df.loc[row_index, "What are the last 4 digits of your cell phone number?"] = id

#%% Define a function to replace values in a dataframe, since the built-in one isn't working
def df_replace(df, col_name, old, new):

    if old in df.values:
        row_indices = df.index[df[col_name] == old].tolist()
        df.at[row_indices[0], col_name] = new

#%% Format ids in discourse themes doc
for row_index in discourse.index.tolist():
    id = format_id(discourse.at[row_index, 'ID'])
    discourse.at[row_index, 'ID'] = id
    organizer_id = format_id(discourse.at[row_index, 'Organizer ID'])
    discourse.at[row_index, 'Organizer ID'] = organizer_id

#%% Add made-up ids for new control group participants, since they didn't provide them

ctrl_new.insert(loc = 0,
          column = 'ID',
          value = np.nan)

for row_index in ctrl_new.index.tolist():
    ctrl_new.at[row_index, 'ID'] = format_id(random.randint(0000,9999))

#%% Make lists of organizer and partner IDs, rename duplicate organizer IDs in all sheets, 
# and link organizer and partner IDs in a dictionary

#Drop empty rows
data = participant_list.dropna(subset = [":Organizer name"])

org_ids = []
par_ids = []
pairs = {}

#For each row index in the list of row indexes (i.e., each person)
for row_index in data.index.tolist():

    #Find the cell value under that row and the column ':Partner ID'; assign it to par_id
    par_id = format_id(data.at[row_index, ':Partner ID'])
    par_ids.append(par_id)
    #Find the cell value under that row and the column ':Organizer ID'; assign it to org_id
    org_id = format_id(data.at[row_index, ':Organizer ID'])
    #Check if the ID has been used
    if org_id not in org_ids and org_id not in par_ids:
        org_ids.append(org_id)
        pairs[org_id]=par_id
    else:
        print("ID", org_id, "is a duplicate")
        #Generate new random ID
        new_org_id = format_id(random.randint(0000,9999))
        #Regenerate new random ID if it's already taken
        while new_org_id in org_ids:
            new_org_id = format_id(random.randint(0000,9999))

        #Replace the old ID with the new ID into the participant dataframe
        data.loc[row_index, ':Organizer ID'] = new_org_id
        #Append the new ID to org_ids
        org_ids.append(new_org_id)
        #Save the new ID as the key for the partner value in pairs dictionary
        pairs[new_org_id]=par_id
        #Go through all files with organizer data and replace the old ID with the new ID
        for df in organizer_files:
            df_replace(df, "What are the last 4 digits of your cell phone number?", org_id, new_org_id)
        df_replace(discourse, "Organizer ID", org_id, new_org_id)

#%% Replace variant with main IDs for organizers, partners, and control group members
var_ids = {'1266':'0124', '0158': '0124', '3273': '0255', '2087': '3087', 
            '3248': '8694', '5440': '9440', '0823':'6799', '3223': '7732', 
            '7649': '8682', '1476': '1477', '7290': '7920'}

#Replace all variants with their main value
for var in list(var_ids.keys()):
    df_replace(discourse, "ID", var, var_ids[var])
    for df in result_files:
        df_replace(df, "What are the last 4 digits of your cell phone number?", var, var_ids[var])

for row_index in par_followup.index.tolist():
    id = par_followup.at[row_index, 'What are the last 4 digits of your cell phone number?']
    if id in list(var_ids.keys()):
        par_followup.loc[row_index, 'What are the last 4 digits of your cell phone number?'] = var_ids[id]

#%% Make a list of control IDS
ctrl_ids = []
ctrl_data = control_list.dropna(subset = [":Name"])
ctrl_row_indices = ctrl_data.index.tolist()

for row_index in ctrl_row_indices:

    ctrl_id = ctrl_data.at[row_index, ':Identifier']
    ctrl_id = format_id(ctrl_id)
    ctrl_ids.append(ctrl_id)

#%% Count how many organizers, partners, and control group participants there are
print("There are", len(org_ids), "organizers")
print("There are", len(par_ids), "partners")
print("There are", len(pairs), "pairs")
print("There are", len(ctrl_ids), "survey-only control group participants")
print("There are", len(ctrl_new['Date Submitted']), "no-intervention control group participants")
#%% Change Likert scale responses to numerical values
for df in result_files_with_new_ctrl:

    #Deal with the political orientations first
    df.replace(
        to_replace=[r'Very progressive'],
        value='4',
        regex=True,
        inplace=True
    )

    df.replace(
        to_replace=[r'Somewhat progressive'],
        value='3',
        regex=True,
        inplace=True
    )

    df.replace(
        to_replace=[r'Neither progressive nor conservative'],
        value='2',
        regex=True,
        inplace=True
    )

    df.replace(
        to_replace=[r'Somewhat conservative'],
        value='1',
        regex=True,
    )
        
    df.replace(
        to_replace=[r'Very conservative'],
        value='0',
        regex=True,
    )

    #Then deal with class
    df.replace(
        to_replace=[r'Very wealthy'],
        value='4',
        regex=True,
        inplace=True
    )

    df.replace(
        to_replace=[r'Somewhat wealthy'],
        value='3',
        regex=True,
        inplace=True
    )

    df.replace(
        to_replace=[r'Not wealthy but not experiencing financial hardship either'],
        value='2',
        regex=True,
        inplace=True
    )

    df.replace(
        to_replace=[r'Experiencing some financial hardship'],
        value='1',
        regex=True,
    )
        
    df.replace(
        to_replace=[r'Experiencing a lot of financial hardship'],
        value='0',
        regex=True,
    )
    
    #Find all cells containing 'Not at all' and replace them with 0
    df.replace(
        to_replace=[r'Not at all.*', r'Experiencing a lot of financial hardship'],
        value='0',
        regex=True,
        inplace=True
    )
    #Find all cells containing 'Not very' or 'Slightly' and replace them with 1
    df.replace(
        to_replace=[r'Not very.*', r'Slightly.*', 'Experiencing some financial hardship'],
        value='1',
        regex=True,
        inplace=True
    )
    #Find all cells containing 'Somewhat' or 'Neither' and replace them with 2

    df.replace(
        to_replace=[r'Somewhat.*', r'Neither.*', 'Not wealthy but not experiencing financial hardship either'],
        value='2',
        regex=True,
        inplace=True
    )
    #Find all cells containing 'Fairly' and replace them with 3

    df.replace(
        to_replace=r'Fairly.*',
        value='3',
        regex=True,
        inplace=True
    )
    #Find all cells containing 'Very' and replace them with 4

    df.replace(
        to_replace=r'Very.*',
        value='4',
        regex=True,
        inplace=True
    )
#%% Define a function, check_to_multi, that converts True/False multiple choice answer output into the content of the multiple choice answers. 
def check_to_multi(df, firstcol, lastcol, newcol):
    #Get the position for the new column
    newcol_index = df.columns.get_loc(lastcol)+1
    #Insert a column called to the right of the last column header
    df.insert(loc=newcol_index, column=newcol, value=np.NaN)
    #Subset the df from the first to last column header
    slice = df.loc[:, firstcol:lastcol]
    #Find the indices of all rows in this slice
    row_indices = slice.index.tolist()
    #For each row index in the list of true row indexes (i.e., each person)
    for row_index in row_indices:
        #Set the category name to be blank - we don't know what category or categories this person belongs to yet
        category_names=""
        #Go through each column in the slice
        for col_name in slice.columns:
            #Get cell contents as a string
            cell = str(df[col_name].values[row_index])
            #Check if cell content isn't "False"
            if cell != "False":
                #Check if category names is empty
                if category_names == "":
                    category_names += col_name
                #If category names isn't empty, that is, if we've already found that checked off one option as true, name the category as "Multi"
                else:
                    category_names = "Multi"
         
        #Now that we went through all the columns for this row, fill in the category name into the cell for this row in the new column
        df.at[row_index, newcol] = category_names
    
    #Delete the now obsolete columns in the original slice
    for col_name in slice.columns:
        del df[col_name]
#%% Convert the checkbox answers for race and religion to multiple choice
for df in demo:
    check_to_multi(df=df, firstcol='American Indian or Alaska Native', lastcol='Prefer not to answer', newcol='race')
    check_to_multi(df=df, firstcol='Agnostic', lastcol='Prefer not to answer2', newcol='religion')

#%% Simplify relationship answers

rel_dict = {'My acquaintance': 'acquaintance',
            'My co-worker': 'co-worker',
            'My family member': 'family member',
            'My friend': 'friend',
            'a younger person whose family I know and I\'ve had some interaction with over the past 8 years':'acquaintance',
            'boyfriend': 'significant other',
            'Colleague and friend': 'friend',
            'Introduced via 3rd person friend': 'acquaintance',
            'My boyfriend': 'significant other',
            'my friend\'s daughter': 'acquaintance',
            'Parter': 'significant other',
            'Sister\'s partner': 'family member'}

for old_rel in rel_dict:
    org_pre1.replace(old_rel, rel_dict[old_rel], inplace=True)
    ctrl_new.replace(old_rel, rel_dict[old_rel], inplace=True)
#%% Define function cell_namer to fix column labels based on the name of the data frame
# If the data frame name contains 'org', set beginning to 'est_'
def cell_namer(df):

    beginning = ''
    role = ''
    
    if 'org' in df.name:
        beginning = 'est_'
        role = 'org_'
    
    elif 'par' in df.name:
        role = 'par_'

    elif 'ctrl' in df.name:
        role = 'ctrl_'
    
    elif 'new' in df.name:
        role = 'ctrl_new'

    #Set the ending to the stage of the df (pre1, 1, 2, 3, or followup)
    if 'pre1' in df.name:
        ending = '_pre1'

    elif '1' in df.name:
        ending = '_1'

    elif '2' in df.name:
        ending = '_2'

    elif '3' in df.name:
        ending = '_3'

    elif 'followup' in df.name:
        ending = '_followup'

    elif 'new' in df.name:
        ending = ''

    return beginning, ending, role

#%% Define a function, score_combiner, that takes a dataframe, the names of two columns, and the label of the overall score, finds the sum of cells in those columns and divides it by the number of columns in that range * the number of Likert points-1, and puts the result in a new column
def score_combiner(df, firstcol, lastcol, label):
    #Run cell_namer on the data frame to #figure out how to label new variables
    beg, end, role = cell_namer(df)
    #Set the new column name
    newcol = beg + label + end
    #Get the position for the new column (to the right of the last column in the subset)
    newcol_index = df.columns.get_loc(lastcol)+1
    #Add a new column named beginning + scorelabel + ending to the right of the second column name
    df.insert(loc=newcol_index, column=newcol, value=np.NaN)
    #Subset the df from the first to last column header
    slice = df.loc[:, firstcol:lastcol]
    #Find the indices of all rows in this slice
    row_indices = slice.index.tolist()
    #For each row index in the list of true row indexes (i.e., each person)
    for row_index in row_indices:
        #Set the tally of points to 0
        tally = 0
        #Get the sum of values in the subset
         #Go through each column in the slice
        for col_name in slice.columns:
            #Checks that the value in the cell is a string (i.e., isn't NaN)
            if type(df.at[row_index, col_name]) == str:
                #Add the value of that cell to the tally
                tally += int(df.at[row_index, col_name])
        #Now that we've tallied up all the values in the columns for this row, input the average score (tally divided by the number of columns in the slice) to the new row
        df.at[row_index, newcol] = tally/len(slice.columns)
#%% For each df, find composite scores for knowledge, concern, and likelihood
for df in result_files_with_new_ctrl:
    #Run score_combiner for the correct ranges for knowledge
    score_combiner(df=df, firstcol='Climate science', lastcol='Climate solutions', label='knowledge')
    #Run score_combiner for the correct ranges for concern
    score_combiner(df=df, firstcol='Current local impacts of climate change2', lastcol='Climate justice issues', label='concern')
    #Run score_combiner for the correct ranges for likelihood
    score_combiner(df=df, firstcol='Make lifestyle changes (such as eating less meat and flyin', lastcol='Seek out more information about climate change, climate ju', label='likelihood')

#%% Rename columns to be more readable
for df in result_files_with_new_ctrl:

    #Run cell namer to find beginning and ending
    beg, end, role = cell_namer(df)

    if "What are the last 4 digits of your cell phone number?" in df.columns.tolist():
        df.rename(columns={"What are the last 4 digits of your cell phone number?":"ID"}, inplace=True)

    #Knowledge
    df.rename(columns={"Climate science":beg+"knowledge_science"+end}, inplace=True)
    df.rename(columns={"Current local impacts of climate change":beg+"knowledge_local"+end}, inplace=True)
    df.rename(columns={"Current global impacts of climate change":beg+"knowledge_global"+end}, inplace=True)
    df.rename(columns={"Likely future impacts of climate change":beg+"knowledge_future"+end}, inplace=True)
    df.rename(columns={"Root causes of climate change":beg+"knowledge_causes"+end}, inplace=True)
    df.rename(columns={"Climate justice issues (for instance, how climate change a": beg+"knowledge_cj"+end}, inplace=True)
    df.rename(columns={"Climate solutions":beg+"knowledge_solutions"+end}, inplace=True)

    #Concern
    df.rename(columns={"Current local impacts of climate change2":beg+"concern_local"+end}, inplace=True)
    df.rename(columns={"Current global impacts of climate change2":beg+"concern_global"+end}, inplace=True)
    df.rename(columns={"Likely future impacts of climate change2":beg+"concern_future"+end}, inplace=True)
    df.rename(columns={"Climate justice issues":beg+"concern_cj"+end}, inplace=True)

    #Perceived efficacy
    df.rename(columns={"To what extent do you believe your actions can help counte":beg+"perceived_efficacy"+end}, inplace=True)
    df.rename(columns={"To what extent do you think your conversational partner be":beg+"perceived_efficacy"+end}, inplace=True)
    df.rename(columns={"To what extent do you currently believe your actions can h":beg+"perceived_efficacy"+end}, inplace=True)
    df.rename(columns={"To what extent do you think your conversational partner cu":beg+"perceived_efficacy"+end}, inplace=True)
    
    #Likelihood
    df.rename(columns={"Make lifestyle changes (such as eating less meat and flyin":beg+"likelihood_lifestyle"+end}, inplace=True)
    df.rename(columns={"Take collective action (such as political organizing, cont":beg+"likelihood_collective"+end}, inplace=True)
    df.rename(columns={"Talk to others about climate change, climate justice, or c":beg+"likelihood_talk"+end}, inplace=True)
    df.rename(columns={"Seek out more information about climate change, climate ju":beg+"likelihood_seek_info"+end}, inplace=True)

    #Rating
    if "How well did the conversation go overall?" in df.columns.tolist():
        df.rename(columns={"How well did the conversation go overall?":role+"rating"+end}, inplace=True)

    #Reflection
    if "Is there anything else you would like to share about this " in df.columns.tolist():
        df.rename(columns={"Is there anything else you would like to share about this ":role+"reflection"+end}, inplace=True)
    elif "Is there anything else you would like to share?" in df.columns.tolist():
        df.rename(columns={"Is there anything else you would like to share?":role+"reflection"+end}, inplace=True)

    #Actions taken and barriers - only for org_2 and org_3
    if df.name == 'org_2' or df.name == 'org_3':

        #Actions taken
        df.rename(columns={"Made lifestyle changes (such as eating less meat and flyin":"did_lifestyle"}, inplace=True)
        df.rename(columns={"Took collective action (such as political organizing) to s":"did_collective"}, inplace=True)
        df.rename(columns={"Talked to others about climate change, climate justice, or":"did_talk"}, inplace=True)
        df.rename(columns={"Sought out more information about climate change, climate ":"did_seek_info"}, inplace=True)
        df.rename(columns={"Other:":"did_other"}, inplace=True)

        #Barriers
        df.rename(columns={"Lack of free time":"barrier_time"}, inplace=True)
        df.rename(columns={"Feeling overwhelmed by the climate crisis":"barrier_overwhelm_climate"}, inplace=True)
        df.rename(columns={"Feeling overwhelmed by the amount of possible climate acti":"barrier_overwhelm_action"}, inplace=True)
        df.rename(columns={"Feeling overwhelmed with other things":"barrier_overwhelm_other"}, inplace=True)
        df.rename(columns={"Not having anyone to take action with":"barrier_alone"}, inplace=True)
        df.rename(columns={"Fearing repercussions for having taken climate action":"barrier_fear"}, inplace=True)
        df.rename(columns={"Other:2":"barrier_other"}, inplace=True)

#%% Rename demographic info columns for readability - only for dataframes containing demographic info
for df in demo:

    df.rename(columns={"How old are you?":"age"}, inplace=True)
    df.rename(columns={"Which of the following categories best describes your gend":"gender"}, inplace=True)
    df.rename(columns={"How would you describe your financial situation?":"class"}, inplace=True)
    df.rename(columns={"In what state or territory do you live?":"location"}, inplace=True)
    df.rename(columns={"Which of the following best describes your political orien":"politics"}, inplace=True)
    df.rename(columns={"Have you been personally impacted by climate change?":"affectedness"}, inplace=True)
    df.rename(columns={"Have you ever been involved in climate action?":"past_action"}, inplace=True)
    df.rename(columns={"Please briefly describe your current involvement in climat":"current_action"}, inplace=True)

#Rename interest columns for readability - only for partner pre-survey and control group survey 1
for df in result_files:

    if df.name == 'par_pre1' or df.name == 'ctrl_1':
        
        df.rename(columns={"Climate change":"interest_general"}, inplace=True)
        df.rename(columns={"Climate justice":"interest_cj"}, inplace=True)
        df.rename(columns={"Climate solutions2":"interest_solutions"}, inplace=True)
        df.rename(columns={"How I can take action to stop climate change":"interest_action"}, inplace=True)
        df.rename(columns={"Other:":"interest_other"}, inplace=True)

#Rename columns for readability in the organizer pre-1 dataframe
org_pre1.rename(columns={"Which of the following terms best describes the person wit":"relationship"}, inplace=True)
org_pre1.rename(columns={"How close are you with this person?":"closeness"}, inplace=True)
org_pre1.rename(columns={"Why did you choose this person to talk to?":"partner_choice_reason"}, inplace=True)
org_pre1.rename(columns={"Normalizing talking about the climate crisis":"goal_normalize"}, inplace=True)
org_pre1.rename(columns={"Communicating the seriousness of the climate crisis":"goal_severity"}, inplace=True)
org_pre1.rename(columns={"Communicating the viability of climate solutions":"goal_solutions"}, inplace=True)
org_pre1.rename(columns={"Communicating the need for climate justice":"goal_cj"}, inplace=True)
org_pre1.rename(columns={"Encouraging my conversational partner to take collective c":"goal_action"}, inplace=True)
org_pre1.rename(columns={"Other:":"goal_other"}, inplace=True)

#Rename columns for readability in the followup dataframes

for df in [par_followup, ctrl_followup, ctrl_new]:
    df.rename(columns={"I feel more able to take action to stop climate change":"changes_more able to take action"}, inplace=True)
    df.rename(columns={"I feel more committed to taking climate action":"changes_more committed to action"}, inplace=True)

#Rename columns for readability in the control survey 1 and the control new survey

for df in [ctrl_1, ctrl_new]:
    df.rename(columns={"I would like my survey responses to be publicly archived t":"archive?"}, inplace=True)
    df.rename(columns={"I have read and understood the above information. By check":"consent"}, inplace=True)

#Rename columns for readability in the control surveys 2 and 3
for df in [ctrl_2, ctrl_3]:

        beg, end, role = cell_namer(df)
        
        df.rename(columns={"Is there anything else you would like to share about these":role + "reflection" + end}, inplace=True)
#==============
# Attitudes
#==============

#%% Compute aggregate (estimated) knowledge, concern, and likelihood scores

#Compute aggregate action scores

#Define a function, true_tallier, to find if there are any Trues in a set of columns for each ID
def true_tallier (df, col_names, label):
    #Get the position for the new column (furthest right column in the spreadsheet)
    newcol_index = len(df.columns)
    #Adds a new column named "par_" + label + "_followup" to the right of the second column name
    df.insert(newcol_index, label, np.NaN)
    #Find the indices of all rows in the dataframe
    row_indices = df.index.tolist()
    #For each row index in the list of true row indexes (i.e., each person)
    for row_index in row_indices:
        #Set the tally of true responses to 0
        trues = 0
        #Get the number of true values in the list of columns for this person
        #Go through each column in the list of columns
        for col_name in col_names:
            value = df.at[row_index, col_name]
            #Add 1 to the tally of true responses if the response is "True"
            if value == True:
                trues += 1
            #Now that we've tallied up all the true responses for this person, input that value in the new column
            df.at[row_index, label] = trues

#Define a function, true_combiner, that goes through values in several columns for each row and inputs "True" into a new column if any of them are true
def true_combiner (df, col_names, label):
    #Get the position for the new column (furthest right column in the spreadsheet)
    newcol_index = len(df.columns)
    #Adds a new column named "par_" + label + "_followup" to the right of the second column name
    df.insert(newcol_index, label, np.NaN)
    #Find the indices of all rows in the dataframe
    row_indices = df.index.tolist()
    #For each row index in the list of true row indexes (i.e., each person)
    for row_index in row_indices: 
        #Set the tally of true responses in the new column to False
        df.at[row_index, label] = False
        #Find true values in the list of columns for this person
        #Go through each column in the list of columns
        for col_name in col_names:
            value = df.at[row_index, col_name]
            #Input "True" in the new column if the value is true
            if value == True:
                df.at[row_index, label] = True

#%% Find composite scores for collective action and advocacy, outreach, self-education, donation, and lifestyle changes
def action_scorer(df):
    true_tallier(df, ["I joined a climate organization", "I attended a training", "I participated in another form of collective climate actio", "I testified about my concern at a public meeting", "I contacted my representative"], 'collective_action_advocacy')
    true_combiner(df, ["I sought out more information about climate issues", "I read a book or watched a video about climate issues"], 'self_education')
    true_combiner(df, ["I talked to a few people about climate issues", "I talked to many people about climate issues"], 'outreach')
    true_tallier(df, ["I made lifestyle changes (such as eating less meat and fly"], 'lifestyle')
    true_tallier(df, ["I donated money to a climate organization"], 'donation')
    df.insert(len(df.columns), "aggregated_action_score", np.NaN)
    for row_index in df.index.tolist():
        action_score = int(df.at[row_index, 'collective_action_advocacy']) + int(df.at[row_index, "self_education"]) + int(df.at[row_index, "outreach"]) + int(df.at[row_index, "lifestyle"]) + int(df.at[row_index, "donation"])
        df.at[row_index, "aggregated_action_score"] = action_score
#%% 
for df in [par_followup, ctrl_followup, ctrl_new]:
    action_scorer(df)
#%% Compute aggregated attitude changes in followup

true_combiner(par_followup, ["I learned something I didn't know before:", "I have a deeper understanding of climate justice", "I have a better idea of where to find trustworthy informat"], "changes_knowledge")
true_combiner(par_followup, ["My emotions around climate are more manageable now", "I am more hopeful about climate change", "I have more emotional support on climate issues"], "changes_emotion")
true_combiner(ctrl_followup, ["I learned something I didn't know before:", "I have a deeper understanding of climate justice", "I have a better idea of where to find trustworthy informat"], "changes_knowledge")
true_combiner(ctrl_followup, ["My emotions around climate are more manageable now", "I am more hopeful about climate change", "I have more emotional support on climate issues"], "changes_emotion")
true_combiner(ctrl_new, ["I learned something I didn't know before about climate iss", "I have a deeper understanding of climate justice", "I have a better idea of where to find trustworthy informat"], "changes_knowledge")
true_combiner(ctrl_new, ["My emotions around climate are more manageable now", "I am more hopeful about climate change", "I have more emotional support on climate issues"], "changes_emotion")

#%% Export result file dataframes to csv
for df in result_files_with_new_ctrl:
    df.to_csv(f'{df.name}.csv')

#==============
# Analysis
#==============
#%% Define a function to get a value for a given ID in a given column in a given dataframe
def get_result (id, col, df):

    result = np.NaN

    if id in df.values:
        row_indices = df.index[df["ID"] == id].tolist()
        column_index = df.at[row_indices[0], col]
        value = df.at[row_indices[0], col]
        try:
            result = float(value)
        except ValueError:
            result = np.NaN
    
    return(result)

#%% Define a function to get a string for a given ID in a given column in a given dataframe
def get_result_str (id, col, df):

    result = np.NaN

    if id in df.values:
        row_indices = df.index[df["ID"] == id].tolist()
        column_index = df.at[row_indices[0], col]
        result = df.at[row_indices[0], col]

    return(result)

#%% How well did activists and partners each think the conversations went?

#Make dictionaries, org_ratings and par_ratings, linking organizers to their 3 ratings and partners to their 3 ratings
org_ratings = {}
par_ratings = {}
for id in org_ids:

    three_ratings = []
    par_three_ratings = []

    partner_id = pairs[id]

    org_rating1 = get_result(id, "org_rating_1", org_1)
    three_ratings.append(org_rating1)

    par_rating1 = get_result(partner_id, "par_rating_1", par_1)
    par_three_ratings.append(par_rating1)

    org_rating2 = get_result(id, "org_rating_2", org_2)
    three_ratings.append(org_rating2)

    par_rating2 = get_result(partner_id, "par_rating_2", par_2)
    par_three_ratings.append(par_rating2)

    org_rating3 = get_result(id, "org_rating_3", org_3)
    three_ratings.append(org_rating3)

    par_rating3 = get_result(partner_id, "par_rating_3", par_3)
    par_three_ratings.append(par_rating3)

    org_ratings[id] = three_ratings
    par_ratings[partner_id] = par_three_ratings

# Find the average organizer rating and partner rating
all_org_ratings = []
for id in org_ids:
    for rating in org_ratings[id]:
        if rating != np.NaN:
            all_org_ratings.append(rating)
all_org_ratings = [x for x in all_org_ratings if str(x) != 'nan']
print("Organizer ratings:", all_org_ratings)
avg_all_org_ratings = mean(all_org_ratings)
print("On average, activists rated the conversations a " + str(avg_all_org_ratings) + " out of " + str(likert) + ".")

all_par_ratings = []
for id in org_ids:
    partner_id = pairs[id]
    for rating in par_ratings[partner_id]:
        if rating != np.NaN:
            all_par_ratings.append(rating)

all_par_ratings = [x for x in all_par_ratings if str(x) != 'nan']
avg_all_par_ratings = mean(all_par_ratings)
print("On average, partners rated the conversations a", avg_all_par_ratings, "out of", likert)
#Findings: activists rated the conversations a 3.53/4 so far, while partners rated them a 3.85/4.
#----------
# Attitudes 
#----------

#%% Make a list of partner and control IDs who have taken the followup
ids_fol = par_followup["ID"].tolist() + ctrl_ids + ctrl_new["ID"].tolist()
#%% Combine all attitude, action, and discourse data into one dataframe, results

group = []

#Demographic data
politics = []
race = []
gender = []
age = []
religion = []
affectedness = []
finances = []
closeness = []
relationship = []

#Discourse variables
action_talk = []
justice_talk = []
barrier_talk = []
invitation_talk = []
suggestion_talk = []
solution_talk = []
challenge_lifestyle_talk = []
challenge_lifestyle_talk_par = []
action_planning = []
action_explaining = []
action_effectiveness = []
action_orgs = []
action_experience = []
action_support = []
action_skills = []
action_wins = []
lifestyle_talk = []
lifestyle_talk_par = []

#Attitudes
concern_prior = []
concern_1 = []
concern_2 = []
concern_3 = []
concern_followup = []
concern_delta = []
knowledge_prior = []
knowledge_1 = []
knowledge_2 = []
knowledge_3 = []
knowledge_followup = []
knowledge_delta = []
efficacy_prior = []
efficacy_1 = []
efficacy_2 = []
efficacy_3 = []
efficacy_followup = []
efficacy_delta = []
likelihood_prior = []
likelihood_1 = []
likelihood_2 = []
likelihood_3 = []
likelihood_followup =[]
likelihood_mean = []
likelihood_delta = []
lik_col_prior = []
lik_col_1 = []
lik_col_2 = []
lik_col_3 = []
lik_col_followup = []
lik_col_mean = []
lik_col_delta = []
cj_concern_prior = []
cj_concern_1 = []
cj_concern_2 = []
cj_concern_3 = []
cj_concern_followup = []
cj_concern_delta = []
cj_knowledge_prior = []
cj_knowledge_1 = []
cj_knowledge_2 = []
cj_knowledge_3 = []
cj_knowledge_followup = []
cj_knowledge_delta = []

#Action outcomes
tot_action = []
col_action = []
lifestyle = []
donation = []
self_ed = []
outreach = []

for id in ids_fol:
    if id in par_followup["ID"].tolist():

        #Demographic variables
        group.append('treatment')
        org = list(pairs.keys())[list(pairs.values()).index(id)]
        closeness.append(get_result(org, 'closeness', org_pre1))
        relationship.append(get_result_str(org, 'relationship', org_pre1))
        politics.append(get_result(id, 'politics', par_pre1))
        race.append(get_result_str(id, 'race', par_pre1))
        gender.append(get_result_str(id, 'gender', par_pre1))
        age.append(get_result_str(id, 'age', par_pre1))
        religion.append(get_result_str(id, 'religion', par_pre1))
        affectedness.append(get_result_str(id, 'affectedness', par_pre1))
        finances.append(get_result_str(id, 'class', par_pre1))

        #Discourse variables
        action_talk.append(get_result(id, 'Action talk', discourse))
        justice_talk.append(get_result(id, 'Justice talk', discourse))
        barrier_talk.append(get_result(id, 'Barrier talk', discourse))
        invitation_talk.append(get_result(id, 'Invitations', discourse))
        suggestion_talk.append(get_result(id, 'Suggestions', discourse))
        solution_talk.append(get_result(id, 'Solution talk', discourse))
        challenge_lifestyle_talk.append(get_result(id, 'Challenging lifestyle changes', discourse))
        challenge_lifestyle_talk_par.append(get_result(id, 'Challenging lifestyle changes - Partner', discourse))
        action_planning.append(get_result(id, 'Action - planning', discourse))
        action_explaining.append(get_result(id, 'Action - explaining', discourse))
        action_effectiveness.append(get_result(id, 'Action - effectiveness', discourse))
        action_orgs.append(get_result(id, 'Action - org talk', discourse))
        action_experience.append(get_result(id, 'Action - experiences', discourse))
        action_support.append(get_result(id, 'Action - supports', discourse))
        action_skills.append(get_result(id, 'Action - skills/identity', discourse))
        action_wins.append(get_result(id, 'Action - wins and benefits', discourse))
        lifestyle_talk.append(get_result(id, 'Lifestyle talk', discourse))
        lifestyle_talk_par.append(get_result(id, 'Lifestyle talk - Partner', discourse))
        
        #Attitudes
        concern_prior_res = get_result(id, "concern_pre1", par_pre1)
        concern_prior.append(concern_prior_res)
        concern_1.append(get_result(id, "concern_1", par_1))
        concern_2.append(get_result(id, "concern_2", par_2))
        concern_3_res = get_result(id, "concern_3", par_3)
        concern_3.append(concern_3_res)
        concern_followup.append(get_result(id, "concern_followup", par_followup))
        concern_delta.append(concern_3_res - concern_prior_res)
        knowledge_prior_res = get_result(id, "knowledge_pre1", par_pre1)
        knowledge_prior.append(knowledge_prior_res)
        knowledge_1.append(get_result(id, "knowledge_1", par_1))
        knowledge_2.append(get_result(id, "knowledge_2", par_2))
        knowledge_3_res = get_result(id, "knowledge_3", par_3)
        knowledge_3.append(knowledge_3_res)
        knowledge_followup.append(get_result(id, "knowledge_followup", par_followup))
        knowledge_delta.append(knowledge_3_res - knowledge_prior_res)
        efficacy_prior_res = get_result(id, "perceived_efficacy_pre1", par_pre1)
        efficacy_prior.append (efficacy_prior_res)
        efficacy_1.append(get_result(id, "perceived_efficacy_1", par_1))
        efficacy_2.append(get_result(id, "perceived_efficacy_2", par_2))
        efficacy_3_res = get_result(id, "perceived_efficacy_3", par_3)
        efficacy_3.append(efficacy_3_res)
        efficacy_followup.append(get_result(id, "perceived_efficacy_followup", par_followup))
        efficacy_delta.append(efficacy_3_res - efficacy_prior_res)
        likelihood_prior_res = get_result(id, "likelihood_pre1", par_pre1)
        likelihood_prior.append(likelihood_prior_res)
        likelihood_1_res = get_result(id, "likelihood_1", par_1)
        likelihood_1.append(likelihood_1_res)
        likelihood_2_res = get_result(id, "likelihood_2", par_2)
        likelihood_2.append(likelihood_2_res)
        likelihood_3_res = get_result(id, "likelihood_3", par_3)
        likelihood_3.append(likelihood_3_res)
        likelihood_followup_res = (get_result(id, "likelihood_followup", par_followup))
        likelihood_followup.append(likelihood_followup_res)
        likelihood_mean.append((likelihood_prior_res + likelihood_1_res + likelihood_2_res + likelihood_3_res)/4)
        likelihood_delta.append(likelihood_3_res - likelihood_prior_res)
        lik_col_prior_res = get_result(id, 'likelihood_collective_pre1', par_pre1)
        lik_col_prior.append(lik_col_prior_res)
        lik_col_1_res = get_result(id, 'likelihood_collective_1', par_1)
        lik_col_1.append(lik_col_1_res)
        lik_col_2_res = get_result(id, 'likelihood_collective_2', par_2)
        lik_col_2.append(lik_col_2_res)
        lik_col_3_res = get_result(id, 'likelihood_collective_3', par_3)
        lik_col_3.append(lik_col_3_res)
        lik_col_followup_res = get_result(id, 'likelihood_collective_followup', par_followup)
        lik_col_followup.append(lik_col_followup_res)
        lik_col_mean_res = (lik_col_prior_res + lik_col_1_res + lik_col_2_res + lik_col_3_res)/4
        lik_col_mean.append(lik_col_mean_res)
        lik_col_delta.append(lik_col_3_res - lik_col_prior_res)
        cj_concern_prior_res = get_result(id, "concern_cj_pre1", par_pre1)
        cj_concern_prior.append(cj_concern_prior_res)
        cj_concern_1.append(get_result(id, "concern_cj_1", par_1))
        cj_concern_2.append(get_result(id, "concern_cj_2", par_2))
        cj_concern_3_res = get_result(id, "concern_cj_3", par_3)
        cj_concern_3.append(cj_concern_3_res)
        cj_concern_followup.append(get_result(id, "concern_cj_followup", par_followup))
        cj_concern_delta.append(cj_concern_3_res - cj_concern_prior_res)
        cj_knowledge_prior_res = get_result(id, "knowledge_cj_pre1", par_pre1)
        cj_knowledge_prior.append(cj_knowledge_prior_res)
        cj_knowledge_1.append(get_result(id, "knowledge_cj_1", par_1))
        cj_knowledge_2.append(get_result(id, "knowledge_cj_2", par_2))
        cj_knowledge_3_res = get_result(id, "knowledge_cj_3", par_3)
        cj_knowledge_3.append(cj_knowledge_3_res)
        cj_knowledge_followup.append(get_result(id, "knowledge_cj_followup", par_followup))
        cj_knowledge_delta.append(cj_knowledge_3_res - cj_knowledge_prior_res)

        #Actions
        tot_action.append(get_result(id,'aggregated_action_score', par_followup))
        col_action.append(get_result(id,'collective_action_advocacy', par_followup))
        lifestyle.append(get_result(id, 'lifestyle', par_followup))
        donation.append(get_result(id, 'donation', par_followup))
        self_ed.append(get_result(id, 'self_education', par_followup))
        outreach.append(get_result(id, 'outreach', par_followup)) 

    elif id in ctrl_followup["ID"].tolist():

        #Demographic variables
        group.append('control')
        closeness.append(np.NaN)
        relationship.append(np.NaN)
        politics.append(get_result(id, 'politics', ctrl_1))
        race.append(get_result_str(id, 'race', ctrl_1))
        gender.append(get_result_str(id, 'gender', ctrl_1))
        age.append(get_result_str(id, 'age', ctrl_1))
        religion.append(get_result_str(id, 'religion', ctrl_1))
        affectedness.append(get_result_str(id, 'affectedness', ctrl_1))
        finances.append(get_result_str(id, 'class', ctrl_1))

        #Discourse variables -- all np.NaN since this is the control group and they didn't have conversations
        dvs = [action_talk, justice_talk, barrier_talk, invitation_talk, suggestion_talk, solution_talk, challenge_lifestyle_talk,
            challenge_lifestyle_talk_par, action_planning, action_explaining, action_effectiveness, action_orgs,
            action_experience, action_support, action_skills, action_wins, lifestyle_talk, lifestyle_talk_par]

        for dv in dvs:
            dv.append(np.NaN)
    
        #Attitudes
        concern_prior_res = get_result(id, "concern_1", ctrl_1)
        concern_prior.append(concern_prior_res)
        concern_1.append(np.NaN)
        concern_2.append(get_result(id, "concern_2", ctrl_2))
        concern_3_res = get_result(id, "concern_3", ctrl_3)
        concern_3.append(concern_3_res)
        concern_followup.append(get_result(id, "concern_followup", ctrl_followup))
        concern_delta.append(concern_3_res - concern_prior_res)
        knowledge_prior_res = get_result(id, "knowledge_1", ctrl_1)
        knowledge_prior.append(knowledge_prior_res)
        knowledge_1.append(np.NaN)
        knowledge_2.append(get_result(id, "knowledge_2", ctrl_2))
        knowledge_3_res = get_result(id, "knowledge_3", ctrl_3)
        knowledge_3.append(knowledge_3_res)
        knowledge_followup.append(get_result(id, "knowledge_followup", ctrl_followup))
        knowledge_delta.append(knowledge_3_res - knowledge_prior_res)
        efficacy_prior_res = (get_result(id, "perceived_efficacy_1", ctrl_1))
        efficacy_prior.append(efficacy_prior_res)
        efficacy_1.append(np.NaN)
        efficacy_2.append(get_result(id, "perceived_efficacy_2", ctrl_2))
        efficacy_3_res = get_result(id, "perceived_efficacy_3", ctrl_3)
        efficacy_3.append(efficacy_3_res)
        efficacy_followup.append(get_result(id, "perceived_efficacy_followup", ctrl_followup))
        efficacy_delta.append(efficacy_3_res - efficacy_prior_res)
        likelihood_prior_res = get_result(id, 'likelihood_1', ctrl_1)
        likelihood_prior.append(likelihood_prior_res)
        likelihood_1.append(np.NaN)
        likelihood_2_res = get_result(id, 'likelihood_2', ctrl_2)
        likelihood_2.append(likelihood_2_res)
        likelihood_3_res = get_result(id, 'likelihood_3', ctrl_3)
        likelihood_3.append(likelihood_3_res)
        likelihood_followup_res = get_result(id, 'likelihood_followup', ctrl_followup)
        likelihood_followup.append(likelihood_followup_res)
        likelihood_mean_res = (likelihood_prior_res + likelihood_2_res + likelihood_3_res)/3
        likelihood_mean.append(likelihood_mean_res)
        likelihood_delta.append(likelihood_3_res - likelihood_prior_res)
        lik_col_prior_res = get_result(id, 'likelihood_collective_1', ctrl_1)
        lik_col_prior.append(lik_col_prior_res)
        lik_col_1.append(np.NaN)
        lik_col_2_res = get_result(id, 'likelihood_collective_2', ctrl_2)
        lik_col_2.append(lik_col_2_res)
        lik_col_3_res = get_result(id, 'likelihood_collective_3', ctrl_3)
        lik_col_3.append(lik_col_3_res)
        lik_col_followup_res = get_result(id, 'likelihood_collective_followup', ctrl_followup)
        lik_col_followup.append(lik_col_followup_res)
        lik_col_mean_res = (lik_col_prior_res + lik_col_2_res + lik_col_3_res)/3
        lik_col_mean.append(lik_col_mean_res)
        lik_col_delta.append(lik_col_3_res - lik_col_prior_res)
        cj_concern_prior_res = (get_result(id, "concern_cj_1", ctrl_1))
        cj_concern_prior.append(cj_concern_prior_res)
        cj_concern_1.append(np.NaN)
        cj_concern_2.append(get_result(id, "concern_cj_2", ctrl_2))
        cj_concern_3_res = get_result(id, "concern_cj_3", ctrl_3)
        cj_concern_3.append(cj_concern_3_res)
        cj_concern_followup.append(get_result(id, "concern_cj_followup", ctrl_followup))
        cj_concern_delta.append(cj_concern_3_res - cj_concern_prior_res)
        cj_knowledge_prior_res = get_result(id, "knowledge_cj_1", ctrl_1)
        cj_knowledge_prior.append(cj_knowledge_prior_res)
        cj_knowledge_1.append(np.NaN)
        cj_knowledge_2.append(get_result(id, "knowledge_cj_2", ctrl_2))
        cj_knowledge_3_res = get_result(id, "knowledge_cj_3", ctrl_3)
        cj_knowledge_3.append(cj_knowledge_3_res)
        cj_knowledge_followup.append(get_result(id, "knowledge_cj_followup", ctrl_followup))
        cj_knowledge_delta.append(cj_knowledge_3_res - cj_knowledge_prior_res)

        #Actions
        tot_action.append(get_result(id,'aggregated_action_score', ctrl_followup))
        col_action.append(get_result(id,'collective_action_advocacy', ctrl_followup))
        lifestyle.append(get_result(id, 'lifestyle', ctrl_followup))
        donation.append(get_result(id, 'donation', ctrl_followup))
        self_ed.append(get_result(id, 'self_education', ctrl_followup))
        outreach.append(get_result(id, 'outreach', ctrl_followup)) 

    elif id in ctrl_new["ID"].tolist():
        
        #Demographic variables
        group.append('control_new')
        closeness.append(np.NaN)
        relationship.append(np.NaN)
        politics.append(get_result(id, 'politics', ctrl_new))
        race.append(get_result_str(id, 'race', ctrl_new))
        gender.append(get_result_str(id, 'gender', ctrl_new))
        age.append(get_result_str(id, 'age', ctrl_new))
        religion.append(get_result_str(id, 'religion', ctrl_new))
        affectedness.append(get_result_str(id, 'affectedness', ctrl_new))
        finances.append(get_result_str(id, 'class', ctrl_new))

        #Discourse variables -- all np.NaN since this is the second control group and they didn't have conversations        
        for dv in dvs:
            dv.append(np.NaN)

        #Attitudes
        extra_meas = [concern_1, concern_2, concern_3, concern_followup, concern_delta,
                    knowledge_1, knowledge_2, knowledge_3, knowledge_followup, knowledge_delta,
                    efficacy_1, efficacy_2, efficacy_3, efficacy_followup, efficacy_delta,
                    likelihood_1, likelihood_2, likelihood_3, likelihood_followup, likelihood_mean, likelihood_delta,
                    lik_col_1, lik_col_2, lik_col_3, lik_col_followup, lik_col_mean, lik_col_delta,
                    cj_concern_1, cj_concern_2, cj_concern_3, cj_concern_followup, cj_concern_delta,
                    cj_knowledge_1, cj_knowledge_2, cj_knowledge_3, cj_knowledge_followup, cj_knowledge_delta]

        for meas in extra_meas:
            meas.append(np.NaN)

        concern_prior.append(get_result(id, "concern", ctrl_new))
        knowledge_prior.append(get_result(id, "knowledge", ctrl_new))
        efficacy_prior.append(get_result(id, "perceived_efficacy", ctrl_new))
        likelihood_prior.append(get_result(id, "likelihood", ctrl_new))
        lik_col_prior.append(get_result(id, 'likelihood_collective', ctrl_new))
        cj_concern_prior.append(get_result(id, "concern_cj", ctrl_new))
        cj_knowledge_prior.append(get_result(id, "knowledge_cj", ctrl_new))

        #Actions
        tot_action.append(get_result(id,'aggregated_action_score', ctrl_new))
        col_action.append(get_result(id,'collective_action_advocacy', ctrl_new))
        lifestyle.append(get_result(id, 'lifestyle', ctrl_new))
        donation.append(get_result(id, 'donation', ctrl_new))
        self_ed.append(get_result(id, 'self_education', ctrl_new))
        outreach.append(get_result(id, 'outreach', ctrl_new)) 

#Make a dictionary of reported attitudes and turn it into a dataframe
results_dict = {'ID': ids_fol,
            'group': group,
            'closeness': closeness,
            'relationship': relationship,
            'politics': politics,
            'race': race,
            'gender': gender,
            'age': age,
            'religion': religion,
            'affectedness': affectedness,
            'finances': finances,
            'action_talk': action_talk,
            'justice_talk': justice_talk,
            'barrier_talk': barrier_talk,
            'invitation_talk': invitation_talk,
            'suggestion_talk': suggestion_talk,
            'solution_talk': solution_talk,
            'challenge_lifestyle_talk': challenge_lifestyle_talk,
            'challenge_lifestyle_talk_par': challenge_lifestyle_talk_par,
            'action_planning': action_planning,
            'action_explaining': action_explaining,
            'action_effectiveness': action_effectiveness,
            'action_orgs': action_orgs,
            'action_experience': action_experience,
            'action_support': action_support,
            'action_skills': action_skills,
            'action_wins': action_wins,
            'lifestyle_talk': lifestyle_talk,
            'lifestyle_talk_par': lifestyle_talk_par,
            'concern_prior': concern_prior, 
            'concern_1': concern_1, 
            'concern_2': concern_2, 
            'concern_3': concern_3, 
            'concern_followup': concern_followup,
            'concern_delta': concern_delta,
            'knowledge_prior': knowledge_prior, 
            'knowledge_1': knowledge_1, 
            'knowledge_2': knowledge_2, 
            'knowledge_3': knowledge_3, 
            'knowledge_followup': knowledge_followup,
            'knowledge_delta': knowledge_delta,
            'efficacy_prior': efficacy_prior, 
            'efficacy_1': efficacy_1, 
            'efficacy_2': efficacy_2, 
            'efficacy_3': efficacy_3, 
            'efficacy_followup': efficacy_followup,
            'efficacy_delta': efficacy_delta,
            'likelihood_prior': likelihood_prior, 
            'likelihood_1': likelihood_1, 
            'likelihood_2': likelihood_2, 
            'likelihood_3': likelihood_3, 
            'likelihood_followup': likelihood_followup,
            'likelihood_mean': likelihood_mean,
            'likelihood_delta': likelihood_delta,
            'lik_col_prior': lik_col_prior,
            'lik_col_1': lik_col_1,
            'lik_col_2': lik_col_2,
            'lik_col_3': lik_col_3,
            'lik_col_followup': lik_col_followup,
            'lik_col_mean': lik_col_mean,
            'lik_col_delta': lik_col_delta,
            'cj_concern_prior': cj_concern_prior,
            'cj_concern_1': cj_concern_1,
            'cj_concern_2': cj_concern_2,
            'cj_concern_3': cj_concern_3,
            'cj_concern_followup': cj_concern_followup,
            'cj_concern_delta': cj_concern_delta,
            'cj_knowledge_prior': cj_knowledge_prior,
            'cj_knowledge_1': cj_knowledge_1,
            'cj_knowledge_2': cj_knowledge_2,
            'cj_knowledge_3': cj_knowledge_3,
            'cj_knowledge_followup': cj_knowledge_followup,
            'cj_knowledge_delta': cj_knowledge_delta,
            'tot_action': tot_action,
            'col_action': col_action,
            'lifestyle': lifestyle,
            'donation': donation,
            'self_ed': self_ed,
            'outreach': outreach}

results = pd.DataFrame(results_dict)

#Replace 'nan' strings with np.NaN
results.replace('nan',np.NaN)

#%% Simplify affectedness answers

effects = []

for i in results['affectedness']:
    if i not in ['No', 'I’m not sure']:
        effects.append('Yes')
    else:
        effects.append(i)

results['affectedness'] = effects

#%% Results to CSV
results.to_csv('combined_results.csv')

#%% Split results dataframe into par_res, ctrl_res, and new_ctrl_res
par_res = results.query('group == "treatment"')
ctrl_res = results.query('group == "control"')
new_ctrl_res = results.query('group == "control_new"')
par_ctrl = results.query('group != "control_new"')

#%% Define a Mixed ANOVA function
def mix_anova (df, var_pre, var_post, value_name):

    #Get rid of nans in columns of interest
    df2 = df[~df[var_pre].isna()]
    df1 = df2[~df2[var_post].isna()]

    #Reorganize dataframe into long format
    df_melt = pd.melt(df1, id_vars=['ID', 'group'], value_vars=[var_pre, var_post],
                var_name='time', value_name=value_name)

    #Rename efficacy_prior and efficacy_post3 to "pre" and "post-3"
    df_melt['time'] = df_melt['time'].map({var_pre: 'pre', var_post: 'post-3'})

    aov = mixed_anova(dv=value_name, between='group',
                    within='time', subject='ID', data=df_melt)

    pg.print_table(aov)

    #Mauchly's test of sphericity
    print('Mauchly\'s test of sphericity')
    print(pg.sphericity(data=df_melt, dv=value_name, subject='ID', within='time')[-1])

    #Knowledge change - Shapiro-Wilks test that residuals are normally distributed
    print('Shapiro-Wilk\'s test that residuals are normally distributed')
    df_melt['factor_comb']=df_melt["group"] + '-' + df_melt["time"]
    print(pg.normality(df_melt, dv=value_name, group='factor_comb'))

    #Knowledge change - Levene's test of homoscedascicity
    df_melt_before = pd.melt(df1.reset_index(), id_vars=['ID', 'group'], value_vars=[var_pre])
    df_melt_after = pd.melt(df1.reset_index(), id_vars=['ID', 'group'], value_vars=[var_post])
    print('Levene\'s test of homoscedascicity - Pre-study')
    print(pg.homoscedasticity(df_melt_before, dv='value', group='group'))
    print('Levene\'s test of homoscedascicity - Post-3')
    print(pg.homoscedasticity(df_melt_after, dv='value', group='group'))

#%% Mixed anova -- efficacy change by group
mix_anova(par_ctrl, 'efficacy_prior', 'efficacy_3', 'efficacy_score')

#%% Mixed anova -- concern change by group
mix_anova(par_ctrl, 'concern_prior', 'concern_3', 'concern_score')

#%% Mixed anova -- likelihood change by group
mix_anova(par_ctrl, 'likelihood_prior', 'likelihood_3', 'likelihood_score')

#%% Mixed anova -- knowledge change by group
mix_anova(par_ctrl, 'knowledge_prior', 'knowledge_3', 'knowledge_score')


#%% Subset the control 1 dataframe to include only responses who are in ctrl_res
ctrl_1_complete = ctrl_1[ctrl_1["ID"].isin(ctrl_res["ID"].values.tolist())]
ctrl_1_complete

#%% Make a barplot of average # of actions per group by specific action type

action_types = ['Talking to\nothers', 'Seeking more\ninformation', 'Lifestyle\nchanges', 
                'Collective action\nand advocacy', 'Donating\nmoney']

actions_par = [mean(par_res['outreach']), mean(par_res['self_ed']), mean(par_res['lifestyle']),
                    mean(par_res['col_action']), mean(par_res['donation'])]

actions_ctrl = [mean(ctrl_res['outreach']), mean(ctrl_res['self_ed']), mean(ctrl_res['lifestyle']),
                    mean(ctrl_res['col_action']), mean(ctrl_res['donation'])]

actions_ctrl_new = [mean(new_ctrl_res['outreach']), mean(new_ctrl_res['self_ed']), mean(new_ctrl_res['lifestyle']),
                    mean(new_ctrl_res['col_action']), mean(new_ctrl_res['donation'])]

plt.figure()

# Numbers of pairs of bars you want
N = 5

# Data on X-axis

# Position of bars on x-axis
ind = np.arange(N)

# Width of a bar 
width = 0.3    

# Plotting
plt.bar(ind, actions_par, width, label=f'Treatment (n={len(par_followup["ID"])})')
plt.bar(ind + width, actions_ctrl, width, label=f'Survey-only control (n={len(ctrl_1_complete["ID"])})')
plt.bar(ind + width + width, actions_ctrl_new, width, label=f'No-intervention control (n={len(ctrl_new["ID"])})')

plt.ylabel("Average number of actions per participant")

# First argument - A list of positions at which ticks should be placed
# Second argument -  A list of labels to place at the given locations
plt.xticks(ind + width, action_types, fontsize=7)

# Finding the best position for legends and putting it
plt.legend(loc='best', prop={'size': 7})

#plt.bar(action_types, percent_of_partners)
#plt.bar(action_types, percent_of_ctrl)
#plt.bar(action_types, percent_of_new_ctrl)
plt.savefig('actions_by_group_avg.pdf', bbox_inches='tight')

#%% Display types of actions taken across groups in a table

grp = []

for i in actions_par:
    grp.append('treatment')

for i in actions_ctrl:
    grp.append('survey-only')

for i in actions_ctrl_new:
    grp.append('no-intervention')

len(grp)
#%%
actions_by_grp = pd.DataFrame([actions_par, actions_ctrl, actions_ctrl_new], columns=action_types) 
actions_by_grp
#%% Make barplot of percent of people in each group who took various actions

#Define a function to count the number of non-NaN and non-0 values in a column
def count_yes_percent (df, col_name):
    yes_list = [x for x in df[col_name].to_list() if str(x) != 'nan' and x > 0]
    percent_yes = len(yes_list)/len(df[col_name])
    return percent_yes

par_collective_percent = count_yes_percent(par_followup, 'collective_action_advocacy')
par_self_ed_percent = count_yes_percent(par_followup, 'self_education')
par_outreach_percent = count_yes_percent(par_followup, 'outreach')
par_lifestyle_percent = count_yes_percent(par_followup, 'lifestyle')
par_donation_percent = count_yes_percent(par_followup, 'donation')

ctrl_collective_percent = count_yes_percent(ctrl_followup, 'collective_action_advocacy')
ctrl_self_ed_percent = count_yes_percent(ctrl_followup, 'self_education')
ctrl_outreach_percent = count_yes_percent(ctrl_followup, 'outreach')
ctrl_lifestyle_percent = count_yes_percent(ctrl_followup, 'lifestyle')
ctrl_donation_percent = count_yes_percent(ctrl_followup, 'donation')

ctrl_new_collective_percent = count_yes_percent(ctrl_new, 'collective_action_advocacy')
ctrl_new_self_ed_percent = count_yes_percent(ctrl_new, 'self_education')
ctrl_new_outreach_percent = count_yes_percent(ctrl_new, 'outreach')
ctrl_new_lifestyle_percent = count_yes_percent(ctrl_new, 'lifestyle')
ctrl_new_donation_percent = count_yes_percent(ctrl_new, 'donation')

action_types = ['Talking to\nothers', 'Seeking more\ninformation', 'Lifestyle\nchanges', 
                'Collective action\nand advocacy', 'Donating\nmoney']

percent_of_partners = [par_outreach_percent, par_self_ed_percent, par_lifestyle_percent,
                    par_collective_percent, par_donation_percent]

percent_of_ctrl = [ctrl_outreach_percent, ctrl_self_ed_percent, ctrl_lifestyle_percent,
                    ctrl_collective_percent, ctrl_donation_percent]

percent_of_new_ctrl = [ctrl_new_outreach_percent, ctrl_new_self_ed_percent, ctrl_new_lifestyle_percent,
                    ctrl_new_collective_percent, ctrl_new_donation_percent]

#%%Calculate 95% confidence intervals for each proportion

#Define a function to calculate a 95% confidence interval of a proportion (Z=1.96)
def ci_range_prop(prop, n):
    ci_range = 1.96 * math.sqrt(prop*(1-prop)/n)
    return ci_range

par_cis = []
ctrl_cis = []
new_ctrl_cis = []

for percent in percent_of_partners:
    par_cis.append(ci_range_prop(percent, len(par_res)))

for percent in percent_of_ctrl:
    ctrl_cis.append(ci_range_prop(percent, len(ctrl_res)))

for percent in percent_of_new_ctrl:
    new_ctrl_cis.append(ci_range_prop(percent, len(new_ctrl_res)))

    
#%%

plt.figure()

# Numbers of pairs of bars you want
N = 5

# Data on X-axis

# Position of bars on x-axis
ind = np.arange(N)

# Width of a bar 
width = 0.3    

# Plotting
plt.bar(ind, percent_of_partners, width, label=f'Treatment (n={len(par_followup["ID"])})', yerr=par_cis)
plt.bar(ind + width, percent_of_ctrl, width, label=f'Survey-only control (n={len(ctrl_1_complete["ID"])})', yerr=ctrl_cis)
plt.bar(ind + width + width, percent_of_new_ctrl, width, label=f'No-intervention control (n={len(ctrl_new["ID"])})', yerr=new_ctrl_cis)

plt.ylabel("% of participants")

from matplotlib.ticker import PercentFormatter

plt.gca().yaxis.set_major_formatter(PercentFormatter(1))

# First argument - A list of positions at which ticks should be placed
# Second argument -  A list of labels to place at the given locations
plt.xticks(ind + width, action_types, fontsize=7)

# Finding the best position for legends and putting it
plt.legend(loc='best', prop={'size': 7})

#plt.bar(action_types, percent_of_partners)
#plt.bar(action_types, percent_of_ctrl)
#plt.bar(action_types, percent_of_new_ctrl)
plt.savefig('actions_by_group_percent.pdf', bbox_inches='tight')
#%% Make a barplot of partners' actions
plt.figure(figsize = (11, 6))
plt.bar(["Outreach", "Seeking more\ninformation", "Lifestyle changes", "Collective action", "Donation"], 
        percent_of_partners)
plt.ylabel("% of participants")
plt.title("Partners learned and talked more, but didn\'t take collective action")
plt.savefig('partner_actions_by_type.pdf', bbox_inches='tight')
#%% Make a barplot of actions taken in between conversations

#For each pair, check if they took any type of action between C1 and C2

act_1_to_2 = []
act_2_to_3 = []
act_1_to_fol = []
act_1_to_fol = []

for id in org_ids:

    lifestyle_1_to_2 = get_result(id, 'did_lifestyle', org_2)
    col_1_to_2 = get_result(id, 'did_collective', org_2)
    talk_1_to_2 = get_result(id, 'did_talk', org_2)
    seek_1_to_2 = get_result(id, 'did_seek_info', org_2)

    lifestyle_2_to_3 = get_result(id, 'did_lifestyle', org_3)
    col_2_to_3 = get_result(id, 'did_collective', org_3)
    talk_2_to_3 = get_result(id, 'did_talk', org_3)
    seek_2_to_3 = get_result(id, 'did_seek_info', org_3)

    tot_act = get_result(pairs[id], 'aggregated_action_score', par_followup)

    if lifestyle_1_to_2 == True or col_1_to_2 == True or talk_1_to_2 == True or seek_1_to_2 == True:
        act_1_to_2.append(True)
    else:
        act_1_to_2.append(False)
    if lifestyle_2_to_3 == True or col_2_to_3 == True or talk_2_to_3 == True or seek_2_to_3 == True:
        act_2_to_3.append(True)
    else:
        act_2_to_3.append(False)
    if tot_act > 0:
        act_1_to_fol.append(True)
    elif tot_act == 0:
        act_1_to_fol.append(False)
    else:
        act_1_to_fol.append(np.NaN)

rep_act = pd.DataFrame({'ID': org_ids,
                'act_1_to_2': act_1_to_2,
                'act_2_to_3': act_2_to_3,
                'act_1_to_fol': act_1_to_fol})

act_new_1_to_2 = rep_act.query('act_1_to_2 == True')
print('# of partners who started taking action between 1 and 2:', len(act_new_1_to_2))
#25

act_new_2_to_3 = rep_act.query('act_1_to_2 == False and act_2_to_3 == True')
print('# of partners who started taking action between 2 and 3:', len(act_new_2_to_3))
#7

act_new_3_to_fol = rep_act.query('act_1_to_2 == False and act_2_to_3 == False and act_1_to_fol == True')
print('# of partners who started taking action between 3 and followup:', len(act_new_3_to_fol))
#7

act_cont = rep_act.query('act_1_to_2 == True and act_2_to_3 == True')
print('# of partners who continued taking action between 1 and 2 and 2 and 3:', len(act_cont))
#22
    
if tot_act > 0:
    act_1_to_fol.append(True)

#Define a function to count the amount of mentions of action reported by the activists
def reported_action_counter (df, col):
    reported_actions = df[col].tolist()
    #Convert all parts of reported_actions to string to deal with some FALSEs being treated as strings in 'did_other'
    for i in reported_actions:
        i = str(i)
    false_count = reported_actions.count(False) + reported_actions.count('FALSE') + reported_actions.count('False')
    action_count = len(reported_actions) - false_count
    return(action_count)

#Run the function for org_2 and org_3
reported_lifestyle_post2 = reported_action_counter(org_2, 'did_lifestyle')
reported_lifestyle_post3 = reported_action_counter(org_3, 'did_lifestyle')
reported_collective_post2 = reported_action_counter(org_2, 'did_collective')
reported_collective_post3 = reported_action_counter(org_3, 'did_collective')
reported_talk_post2 = reported_action_counter(org_2, 'did_talk')
reported_talk_post3 = reported_action_counter(org_3, 'did_talk')
reported_seek_post2 = reported_action_counter(org_2, 'did_seek_info')
reported_seek_post3 = reported_action_counter(org_3, 'did_seek_info')
reported_other_post2 = reported_action_counter(org_2, 'did_other')
reported_other_post3 = reported_action_counter(org_3, 'did_other')

#Make dataframe of actions reported by activists during study
action_reports_dict = {'Between first and second conversations': [reported_seek_post2, reported_talk_post2, reported_lifestyle_post2, reported_other_post2, reported_collective_post2],
                    'Between second and third conversations': [reported_seek_post3, reported_talk_post3, reported_lifestyle_post3, reported_other_post3, reported_collective_post3],
                    }
                    
action_reports = pd.DataFrame(action_reports_dict, index = ['Seeking more information', 'Talking to others', 'Lifestyle changes', 'Other form of action', 'Collective action'])

#Make a barplot showing reports of actions after conversation 2 versus conversation 3
# Numbers of pairs of bars you want
N = 5
# Specify the values of blue bars (height)
blue_bar = (reported_seek_post2, reported_talk_post2, reported_lifestyle_post2, reported_other_post2, reported_collective_post2)
# Specify the values of orange bars (height)
orange_bar = (reported_seek_post3, reported_talk_post3, reported_lifestyle_post3, reported_other_post3, reported_collective_post3)
# Position of bars on x-axis
ind = np.arange(N)
# Figure size
plt.figure(figsize=(10,5))
# Width of a bar 
width = 0.3       
# Plotting
plt.bar(ind, blue_bar , width, label='Between first and second conversations')
plt.bar(ind + width, orange_bar, width, label='Between second and third conversations')
plt.legend(loc="upper left")
plt.xlabel('Type of action')
plt.ylabel('No. of responses')
plt.title('Activists\' reports of which actions their partners mentioned taking', fontweight = "bold")
# xticks()
# First argument - A list of positions at which ticks should be placed
# Second argument -  A list of labels to place at the given locations
plt.xticks(ind + width / 2, ('Seeking more information', 'Talking to others', 'Lifestyle changes', 'Other form of action', 'Collective action'))
plt.show()

#------------
# Barriers
#------------
#%% Make a barplot of reported barriers

barrier_time_post2 = reported_action_counter(org_2, 'barrier_time')
barrier_time_post3 = reported_action_counter(org_3, 'barrier_time')

barrier_overwhelm_climate_post2 = reported_action_counter(org_2, 'barrier_overwhelm_climate')
barrier_overwhelm_climate_post3 = reported_action_counter(org_3, 'barrier_overwhelm_climate')

barrier_overwhelm_action_post2 = reported_action_counter(org_2, 'barrier_overwhelm_action')
barrier_overwhelm_action_post3 = reported_action_counter(org_3, 'barrier_overwhelm_action')

barrier_overwhelm_other_post2 = reported_action_counter(org_2, 'barrier_overwhelm_other')
barrier_overwhelm_other_post3 = reported_action_counter(org_3, 'barrier_overwhelm_other')

barrier_alone_post2 = reported_action_counter(org_2, 'barrier_alone')
barrier_alone_post3 = reported_action_counter(org_3, 'barrier_alone')

barrier_fear_post2 = reported_action_counter(org_2, 'barrier_fear')
barrier_fear_post3 = reported_action_counter(org_3, 'barrier_fear')

barrier_other_post2 = reported_action_counter(org_2, 'barrier_other')
barrier_other_post3 = reported_action_counter(org_3, 'barrier_other')

barriers_indexes = ["Lack of free time", "Feeling overwhelmed by the climate crisis", "Feeling overwhelmed by the amount of possible climate actions", "Feeling overwhelmed with other things", "Not having anyone to take action with", "Fearing repercussions for having taken climate action", "Other:"]

barriers_dict = {'Between first and second conversations': [barrier_time_post2, barrier_overwhelm_climate_post2, barrier_overwhelm_action_post2, barrier_overwhelm_other_post2, barrier_alone_post2, barrier_fear_post2, barrier_other_post2],
                    'Between second and third conversations': [barrier_time_post3, barrier_overwhelm_climate_post3, barrier_overwhelm_action_post3, barrier_overwhelm_other_post3, barrier_alone_post3, barrier_fear_post3, barrier_other_post3],
                    }
                    
barriers = pd.DataFrame(barriers_dict, index = barriers_indexes)

#Make a barplot showing reports of barriers after conversation2 versus conversation3
# Numbers of pairs of bars you want
N = 7
# Specify the values of blue bars (height)
blue_bar = (barrier_time_post2, barrier_overwhelm_climate_post2, barrier_overwhelm_action_post2, barrier_overwhelm_other_post2, barrier_alone_post2, barrier_fear_post2, barrier_other_post2)
# Specify the values of orange bars (height)
orange_bar = (barrier_time_post3, barrier_overwhelm_climate_post3, barrier_overwhelm_action_post3, barrier_overwhelm_other_post3, barrier_alone_post3, barrier_fear_post3, barrier_other_post3)
# Position of bars on x-axis
ind = np.arange(N)
# Figure size
plt.figure(figsize=(30,15))
# Width of a bar 
width = 0.3     
# Plotting
plt.bar(ind, blue_bar , width, label='Between first and second conversations')
plt.bar(ind + width, orange_bar, width, label='Between second and third conversations')  
plt.xlabel('Type of barrier')
plt.ylabel('No. of responses')
plt.title('Activists\' reports of barriers their partners mentioned facing', fontweight = "bold")
# xticks()
# First argument - A list of positions at which ticks should be placed
# Second argument -  A list of labels to place at the given locations
plt.xticks(ind + width / 2, barriers_indexes)
#plt.show()

#--------------------------
# Demographic factors
#--------------------------

#%% Print interest in various topics
#56 interested in solutions, 54 interested in general, 46 interested in action, 43 interested in CJ, 
interest_cj = par_pre1['interest_cj'].value_counts()
print("Interested in climate justice:", interest_cj)
interest_action = par_pre1['interest_action'].value_counts()
print("Interested in climate action:", interest_action)
interest_general = par_pre1['interest_general'].value_counts()
print("Interested in general discussion:", interest_general)
interest_solutions = par_pre1['interest_solutions'].value_counts()
print("Interested in solutions:", interest_solutions)

#%% Make a list of the organizers linked to the partners in par_res (e.g., those whose pairs we have complete data for)
org_complete = []

for par_id in par_res["ID"].values.tolist():
    org_id = (list(pairs.keys())[list(pairs.values()).index(par_id)])
    org_complete.append(org_id)

#%% Filter org_pre1 by the IDs in org_complete

org_pre1_complete = org_pre1[org_pre1['ID'].isin(org_complete)]

#%% Subset the partner pre-1 df to include only responses who are in results
par_pre1_complete = par_pre1[par_pre1["ID"].isin(results["ID"].values.tolist())]
par_pre1_complete

#%% Plot closeness of the participants
closeness_freq = org_pre1_complete['closeness'].value_counts()
print(closeness_freq)
fig, ax = plt.subplots()
closeness_freq.plot(ax=ax, kind='bar')
plt.title('Closeness of relationships between partners and activists', fontweight = "bold")
plt.xlabel('Closeness', fontweight ="bold")
ax.set_xticks([0, 1, 2, 3, 4], labels=["Very close", "Fairly close", "Somewhat close", "Not very close", "Not at all close"], rotation=45, ha='right')
plt.ylabel('No. of pairs', fontweight ="bold")
#plt.show()
#%%
closeness_freq

#%% Plot relationships -- mostly friends and family members
relationship = org_pre1_complete['relationship'].tolist()
rel_options = ['family member', 'co-worker', 'friend', 'acquaintance', 'neighbor', 'teacher', 'student', 'significant other']


#Plot relationship
fig, ax = plt.subplots()
relationship = pd.Series(relationship)
relationship.value_counts().plot(ax=ax, kind='bar')
plt.title('Relationships between partners and activists', fontweight = "bold")
plt.xlabel('Relationship', fontweight ="bold")
ax.set_xticks([0, 1, 2, 3, 4], labels = ["Friends", "Family members", "Significant others", "Acquaintances", "Co-workers"], rotation=45, ha='right')
plt.ylabel('No. of pairs', fontweight ="bold")
#plt.show()

#%%
relationship.value_counts()

# %% Partner race - mostly white, or multiracial, Black, or Asian
par_race_count = par_res['race'].value_counts()
print("Partner race:\n", par_race_count)
par_res['race']

#%%
#Organizer race - mostly white, Black, or Multiracial
org_race_count = org_pre1_complete['race'].value_counts()
print("Organizer race:\n", org_race_count)

#%%
#Control 1 race - white (26), Asian (4), Black (4), multi (4), other (3), Latine (1)
ctrl_race_count = ctrl_res['race'].value_counts()
print("Control 1 race:\n", ctrl_race_count)

#%%
#Control 2 race - 
ctrl_new_race_count = ctrl_new['race'].value_counts()
print("Control 2 race:\n", ctrl_new_race_count)
#%%
#Partner gender - female (34), male (22), nonbinary (5)
par_gender_count = par_res['gender'].value_counts()
print("Partner gender:\n", par_gender_count)

#%%
# organizer gender - female (48), male (14), nonbinary (4), prefer not to answer (2)
org_gender_count = org_pre1_complete['gender'].value_counts()
print("Organizer gender:\n", org_gender_count)

#%%
# control 1 gender - female (25), male (16), non-binary (1)
ctrl_gender_count = ctrl_res['gender'].value_counts()
print("Control 1 gender:\n", ctrl_gender_count)

#%%
# control 2 gender - 
ctrl_new_gender_count = ctrl_new['gender'].value_counts()
print("Control 2 gender:\n", ctrl_new_gender_count)

print(15/40)
#%%
#   partner class - 3 experiencing lots of financial hardship, 1 experiencing some financial hardship, 42 not wealthy but not experiencing hardship either, 1 very wealthy
par_class_count = par_res['finances'].value_counts()
print("Partner class:\n", par_class_count)

#%%
# organizer class - 4 experiencing a lot of financial hardship, 21 experiencing some financial hardship, 40 neither wealthy nor poor, 1 very wealthy
org_class_count = org_pre1_complete['class'].value_counts()
print("Organizer class:\n", org_class_count)

#%%
# control 1 class - 2 experiencing a lot of financial hardship, 9 experiencing some financial hardship, 27 neither wealthy nor poor, 4 very wealthy
ctrl_class_count = ctrl_res['finances'].value_counts()
print("Control 1 class:\n", ctrl_class_count)

#%%
# control 2 class - 
ctrl_new_class_count = ctrl_new['class'].value_counts()
print("Control 2 class:\n", ctrl_new_class_count)

#%%
# partner political orientation - 32 'Neither progressive nor conservative', 22 'very progressive', 4 other, 3 prefer not to answer
par_politics_count = par_res['politics'].value_counts()
print("Partner political affiliation:\n", par_politics_count)

#%%
# organizer political orientation - 41 'Very progressive', 22 'Neither progressive nor conservative', 2 other, 2 PNA
org_politics_count = org_pre1_complete['politics'].value_counts()
print("Organizer political affiliation:\n", org_politics_count)

#%%
# control 1 political orientation - 10 'Very progressive', 15 'Somewhat progressive', 16 'Neither progressive nor conservative', 1 other
ctrl_politics_count = ctrl_res['politics'].value_counts()
print("Control 1 political affiliation:\n", ctrl_politics_count)

#%%
# control 2 political orientation - 
ctrl_new_politics_count = ctrl_new['politics'].value_counts()
print("Control 2 political affiliation:\n", ctrl_new_politics_count)
#%%
# partner religion - mostly Christian, agnostic, or atheist
par_religion_count = par_res['religion'].value_counts()
print("Partner religion:\n", par_religion_count)

#%%
# organizer religion - mostly agnostic, Atheist, or Christian
org_religion_count = org_pre1_complete['religion'].value_counts()
print("Organizer religion:\n", org_religion_count)

#%%
# Control 1 religion - mostly Christian, some Atheist or Agnostic
ctrl_religion_count = ctrl_1_complete['religion'].value_counts()
print("Control 1 religion:\n", ctrl_religion_count)

#%%
# Control 2 religion - 
ctrl_new_religion_count = ctrl_new['religion'].value_counts()
print("Control 2 religion:\n", ctrl_new_religion_count)
#%% partner age - mostly 18-24, 25-34, 35-44
par_age_count = par_res['age'].value_counts()
print("Partner age:\n", par_age_count)

#%% organizer age - mostly 18-24, 25-34, 35-44
org_age_count = org_pre1_complete['age'].value_counts()
print("Organizer age:\n", org_age_count)

#%%
#ctrl 1 age - mostly 18-24, 25-34, 35-44
ctrl_age_count = ctrl_res['age'].value_counts()
print("Control 1 age:\n", ctrl_age_count)

#%% ctrl 2 age - 
ctrl_new_age_count = ctrl_new['age'].value_counts()
print("Control 2 age:\n", ctrl_new_age_count)
#%%
#partner location - Mostly NY, Arizona, NC - 27 states total!
par_location_count = par_pre1['location'].value_counts()
print("Partner location:\n", par_location_count)

#%%
#organizer location - Mostly NY, AZ, AL, CA - 28 states
org_location_count = org_pre1_complete['location'].value_counts()
print("Organizer location:\n", org_location_count)

#%%#control 1 location - Mostly Michigan, NY, PA, CA - 15 states
ctrl_location_count = ctrl_1_complete['location'].value_counts()
print("Control 1 location:\n", ctrl_location_count)

#%%
#control 2 location - 
ctrl_new_location_count = ctrl_new['location'].value_counts()
print("Control 2 location:\n", ctrl_new_location_count)

#%% Organizer affectedness

org_affectedness = org_pre1_complete['affectedness'].tolist()
org_affectedness = [x for x in org_affectedness if str(x) != 'nan']

org_affectedness_new = []

for i in org_affectedness:
    
    if i != 'No' and 'not sure' not in i:
        org_affectedness_new.append('Yes')
    else:
        org_affectedness_new.append(i)

from collections import Counter
Counter(org_affectedness_new)

#%% Partner affectedness
par_affectedness = par_res['affectedness'].value_counts()
print("Partner affectedness:\n", par_affectedness)
#%% Ctrl 1 affectedness
ctrl_affectedness = ctrl_res['affectedness'].value_counts()
print("Control 1 affectedness:\n", ctrl_affectedness)
#%% Ctrl new affectedness
ctrl_new_affectedness = new_ctrl_res['affectedness'].value_counts()
print("Control new affectedness:\n", ctrl_new_affectedness)

#Compare demographics of treatment vs control
#%% Finances - treatment v control (t-test) - not significantly different

ctrl_class = [int(x) for x in ctrl_1['class'] if x in ['0', '1', '2', '3', '4']]
ctrl_new_class = [int(x) for x in ctrl_new['class'] if x in ['0', '1', '2', '3', '4']]
par_class = [int(x) for x in par_pre1['class'] if x in ['0', '1', '2', '3', '4']]

print("Average financial status - treatment group:", mean(par_class),
      "\nAverage financial status - control group:", mean(ctrl_class),
      "\nAverage financial status - new control group:", mean(ctrl_new_class))

#%% Finances - treatment v ctrl1 (t-test) - No significant difference
stats.ttest_ind(a=ctrl_class, b=par_class, equal_var=True)
#%% Finances - ctrl1 vs ctrl new (t-test)
stats.ttest_ind(a=ctrl_class, b=ctrl_new_class, equal_var=True)
#%% Finances - treatment vs ctrl new (t-test)
stats.ttest_ind(a=par_class, b=ctrl_new_class, equal_var=True)
#%% Finances by group (anova) - The three groups' class is significantly different so far
stats.f_oneway(ctrl_class,
               ctrl_new_class,
               par_class)
#%%
def chisq(df, group_col, res_col):

    #Make table
    df_cont = pd.crosstab(df[group_col], df[res_col])
    print(df_cont)

   #Run chi-squared test
    stat, p, dof, expected = chi2_contingency(df_cont)

    # interpret p-value
    alpha = 0.05
    print("p value is " + str(p))
    if p <= alpha:
        print('Dependent (reject H0). Degrees of freedom:', dof, 'Statistic:', stat)
    else:
        print('Independent (H0 holds true)')
#%%
ctrls = results.query('group != "treatment"')
par_ctrl1 = results.query('group != "control_new"')
par_ctrl2 = results.query('group != "control"')

#%% Finances by group (chi-sq) - The three groups' class is significantly different
#Exclude 'prefer not to answer'
res_fin = results.query('finances != "Prefer not to answer"')

#%%
chisq(res_fin, 'group', 'finances')
#%% Political views by group - chi-squared test
chisq(results, 'group', 'politics')
#%%Political views, treatment v ctrl1 - no significant difference
chisq(par_ctrl1, 'group', 'politics')
#%%Political views, treatment v ctrl2
chisq(par_ctrl2, 'group', 'politics')
#%%Political views, ctrl1 v ctrl2
chisq(ctrls, 'group', 'politics')
#%% Age by group - chisq - significant!
chisq(results, 'group', 'age')
#%% Age, treatment v. ctrl1 - chisq - no significant difference
chisq(par_ctrl1, 'group', 'age')
#%% Age, treatment v. ctrl2 - chisq
chisq(par_ctrl2, 'group', 'age')
#%% Race by group - chisq
chisq(results, 'group', 'race')
#%% Race, treatment v ctrl1 - chisq - no significant difference
chisq(par_ctrl1, 'group', 'race')
#%% Race, treatment v ctrl2 - chisq
chisq(par_ctrl2, 'group', 'race')
#%% Gender by group - chisq
chisq(results, 'group', 'gender')
#%% Gender, treatment v ctrl1 - chisq - no significant difference
chisq(par_ctrl1, 'group', 'gender')
#%% Gender, treatment v ctrl2 - chisq
chisq(par_ctrl2, 'group', 'gender')
#%% Religion by group - chisq
chisq(results, 'group', 'religion')
#%% Religion, treatment v ctrl1 - chisq - no significant difference
chisq(par_ctrl1, 'group', 'religion')
#%% Religion, treatment v ctrl2 - chisq - no significant difference
chisq(par_ctrl2, 'group', 'religion')
#%% Affectedness by group - chisq - no significant difference
chisq(results, 'group', 'affectedness')

#%% Prior knowledge (t-test) - treatment v control 1 - no significant difference
#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_1['knowledge_1']), np.var(par_pre1['knowledge_pre1']))

print("Treatment - mean knowledge rating:", mean(par_pre1['knowledge_pre1']))
print("Control - mean knowledge rating:", mean(ctrl_1['knowledge_1']))

#Does prior knowledge differ across the treatment and control 1?
stats.ttest_ind(a=ctrl_1['knowledge_1'], b=par_pre1['knowledge_pre1'], equal_var=True)

#%% Prior knowledge (t-test) - treatment v control 2
#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_new['knowledge']), np.var(par_pre1['knowledge_pre1']))

print("Treatment - mean knowledge rating:", mean(par_pre1['knowledge_pre1']))
print("Control 2 - mean knowledge rating:", mean(ctrl_new['knowledge']))

#Does prior knowledge differ across the treatment and control 2?
stats.ttest_ind(a=ctrl_new['knowledge'], b=par_pre1['knowledge_pre1'], equal_var=True)
#%% Prior knowledge (t-test) - control v control 2
#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_new['knowledge']), np.var(ctrl_1['knowledge_1']))

print("Control 1 - mean knowledge rating:", mean(ctrl_1['knowledge_1']))
print("Control 2 - mean knowledge rating:", mean(ctrl_new['knowledge']))

#Does prior knowledge differ across the control groups?
stats.ttest_ind(a=ctrl_new['knowledge'], b=ctrl_1['knowledge_1'], equal_var=True)
#%% Prior knowledge by group (one-way anova)
par_prior_knowledge = [x for x in results['knowledge_prior'][results['group'] == 'treatment'] if str(x) != "nan"]
ctrl_prior_knowledge = [x for x in results['knowledge_prior'][results['group'] == 'control'] if str(x) != "nan"]
ctrl_new_prior_knowledge = [x for x in results['knowledge_prior'][results['group'] == 'control_new'] if str(x) != "nan"]

stats.f_oneway(par_prior_knowledge,
               ctrl_prior_knowledge,
               ctrl_new_prior_knowledge)
#%% Prior concern (t-test) - treatment v control 1 - no significant difference
#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_1['concern_1']), np.var(par_pre1['concern_pre1']))

print("Treatment - mean concern rating:", mean(par_pre1['concern_pre1']))
print("Control - mean concern rating:", mean(ctrl_1['concern_1']))

#Does prior concern differ across the treatment and control 1?
stats.ttest_ind(a=ctrl_1['concern_1'], b=par_pre1['concern_pre1'], equal_var=True)

#%% Prior concern (t-test) - treatment v control 2
#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_new['concern']), np.var(par_pre1['concern_pre1']))

print("Treatment - mean concern rating:", mean(par_pre1['concern_pre1']))
print("Control 2 - mean concern rating:", mean(ctrl_new['concern']))

#Does prior concern differ across the treatment and control 2?
stats.ttest_ind(a=ctrl_new['concern'], b=par_pre1['concern_pre1'], equal_var=True)

#%% Prior concern (t-test) - control 1 v control 2
#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_new['concern']), np.var(ctrl_1['concern_1']))

print("Control 1 - mean concern rating:", mean(ctrl_1['concern_1']))
print("Control 2 - mean concern rating:", mean(ctrl_new['concern']))

#Does prior concern differ across the control groups?
stats.ttest_ind(a=ctrl_new['concern'], b=ctrl_1['concern_1'], equal_var=True)

#%% Prior concern by group (one-way anova)
par_prior_concern = [x for x in results['concern_prior'][results['group'] == 'treatment'] if str(x) != "nan"]
ctrl_prior_concern = [x for x in results['concern_prior'][results['group'] == 'control'] if str(x) != "nan"]
ctrl_new_prior_concern = [x for x in results['concern_prior'][results['group'] == 'control_new'] if str(x) != "nan"]

stats.f_oneway(par_prior_concern,
               ctrl_prior_concern,
               ctrl_new_prior_concern)

#%% Prior efficacy (t-test) - partners vs control 1 - no significant difference

eff_ctrl = [int(x) for x in ctrl_1['perceived_efficacy_1']]
eff_par = [int(x) for x in par_pre1['perceived_efficacy_pre1']]

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(eff_ctrl), np.var(eff_par))

print("Treatment - mean perceived efficacy rating:", mean(eff_par))
print("Control - mean perceived efficacy rating:", mean(eff_ctrl))

#Does prior perceived efficacy differ across the treatment and control 1?
stats.ttest_ind(a=eff_ctrl, b=eff_par, equal_var=True)

#%% Prior efficacy (t-test) - partners vs control 2
eff_ctrl_new = [int(x) for x in ctrl_new['perceived_efficacy']]

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(eff_ctrl_new), np.var(eff_par))

print("Treatment - mean perceived efficacy rating:", mean(eff_par))
print("Control 2 - mean perceived efficacy rating:", mean(eff_ctrl_new))

#Does prior perceived efficacy differ across the treatment and control 2?
stats.ttest_ind(a=eff_ctrl_new, b=eff_par, equal_var=True)
#%% Prior efficacy (t-test) - control 1 vs control 2

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(eff_ctrl_new), np.var(eff_ctrl))

print("Control 1 - mean perceived efficacy rating:", mean(eff_ctrl))
print("Control 2 - mean perceived efficacy rating:", mean(eff_ctrl_new))

#Does prior perceived efficacy differ across the control groups?
stats.ttest_ind(a=eff_ctrl_new, b=eff_ctrl, equal_var=True)

#%% Prior efficacy by group (one-way anova)
par_prior_eff = [x for x in results['efficacy_prior'][results['group'] == 'treatment'] if str(x) != "nan"]
ctrl_prior_eff = [x for x in results['efficacy_prior'][results['group'] == 'control'] if str(x) != "nan"]
ctrl_new_prior_eff = [x for x in results['efficacy_prior'][results['group'] == 'control_new'] if str(x) != "nan"]

stats.f_oneway(par_prior_eff,
               ctrl_prior_eff,
               ctrl_new_prior_eff)

#%% Prior likelihood of action (t-test) - partners vs control 1 - no significant difference

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_1['likelihood_1']), np.var(par_pre1['likelihood_pre1']))

print("Treatment - mean likelihood rating:", mean(par_pre1['likelihood_pre1']))
print("Control - mean likelihood rating:", np.nanmean(ctrl_1['likelihood_1']))

#%%
#Does prior likelihood of taking action differ across the treatment and control 1?
stats.ttest_ind(a=ctrl_1['likelihood_1'], b=par_pre1['likelihood_pre1'], equal_var=True)

#%% Prior likelihood of action (t-test) - partners vs control 2

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_new['likelihood']), np.var(par_pre1['likelihood_pre1']))

print("Treatment - mean likelihood rating:", mean(par_pre1['likelihood_pre1']))
print("Control 2 - mean likelihood rating:", mean(ctrl_new['likelihood']))

#Does prior likelihood of taking action differ across the treatment and control 2?
stats.ttest_ind(a=ctrl_new['likelihood'], b=par_pre1['likelihood_pre1'], equal_var=True)

#%% Prior likelihood of action (t-test) - control 1 vs control 2

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(ctrl_new['likelihood']), np.var(ctrl_1['likelihood_1']))

print("Control 1 - mean likelihood rating:", mean(ctrl_1['likelihood_1']))
print("Control 2 - mean likelihood rating:", mean(ctrl_new['likelihood']))

#Does prior likelihood of taking action differ across the control groups?
stats.ttest_ind(a=ctrl_new['likelihood'], b=ctrl_1['likelihood_1'], equal_var=True)

#%% Prior likelihood by group (one-way anova) - Significant difference
par_prior_lik = [x for x in results['likelihood_prior'][results['group'] == 'treatment'] if str(x) != "nan"]
ctrl_prior_lik = [x for x in results['likelihood_prior'][results['group'] == 'control'] if str(x) != "nan"]
ctrl_new_prior_lik = [x for x in results['likelihood_prior'][results['group'] == 'control_new'] if str(x) != "nan"]

stats.f_oneway(par_prior_lik,
               ctrl_prior_lik,
               ctrl_new_prior_lik)
#%% Prior likelihood of collective action (t-test) - partners vs control 1 - no significant difference
lik_col_ctrl = [int(x) for x in ctrl_1['likelihood_collective_1']]
lik_col_par = [int(x) for x in par_pre1['likelihood_collective_pre1']]

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(lik_col_ctrl), np.var(lik_col_par))

print("Treatment - mean likelihood collective rating:", mean(lik_col_par))
print("Control - mean likelihood collective rating:", mean(lik_col_ctrl))

#Does prior likelihood of collective action differ across the treatment and control 1?
stats.ttest_ind(a=lik_col_ctrl, b=lik_col_par, equal_var=True)

#%% Prior likelihood of collective action (t-test) - partners vs control 2
lik_col_ctrl_new = [int(x) for x in ctrl_new['likelihood']]

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(lik_col_ctrl_new), np.var(lik_col_par))

print("Treatment - mean likelihood collective rating:", mean(lik_col_par))
print("Control 2 - mean likelihood collective rating:", mean(lik_col_ctrl_new))

#Does prior likelihood of collective action differ across the treatment and control 2?
stats.ttest_ind(a=lik_col_ctrl_new, b=lik_col_par, equal_var=True)

#%% Prior likelihood of collective action (t-test) - control 1 vs control 2

#Check that the ratio of variance is less than 4:1 so we can proceed with t-test
print(np.var(lik_col_ctrl_new), np.var(lik_col_ctrl))

print("Control 1 - mean likelihood collective rating:", mean(lik_col_ctrl))
print("Control 2 - mean likelihood collective rating:", mean(lik_col_ctrl_new))

#Does prior likelihood of collective action differ across the control groups?
stats.ttest_ind(a=lik_col_ctrl_new, b=lik_col_ctrl, equal_var=True)

#%% Total actions, treatment v control 1 (t-test) No difference!
stats.ttest_ind(a=ctrl_followup['aggregated_action_score'], b=par_followup['aggregated_action_score'], equal_var=True)

#%% Total actions, treatment v control 2 (t-test)
stats.ttest_ind(a=ctrl_new['aggregated_action_score'], b=par_followup['aggregated_action_score'], equal_var=True)

#%% Total actions, control 1 v control 2 (t-test)
stats.ttest_ind(a=ctrl_new['aggregated_action_score'], b=ctrl_followup['aggregated_action_score'], equal_var=True)

#%% Total actions by group (one-way anova)
stats.f_oneway(results["tot_action"][results["group"]=="treatment"],
               results["tot_action"][results["group"]=="control"],
               results["tot_action"][results["group"]=="control_new"])
#%% Collective actions, treatment v control 1 (t-test) No difference!
stats.ttest_ind(a=ctrl_followup['collective_action_advocacy'], b=par_followup['collective_action_advocacy'], equal_var=True)

#%% Collective actions, treatment v control 2 (t-test) 
stats.ttest_ind(a=ctrl_new['collective_action_advocacy'], b=par_followup['collective_action_advocacy'], equal_var=True)

#%% Collective actions, control 1 v control 2 (t-test)
stats.ttest_ind(a=ctrl_followup['collective_action_advocacy'], b=ctrl_new['collective_action_advocacy'], equal_var=True)

#%% Collective action by group (one-way anova)
stats.f_oneway(results["col_action"][results["group"]=="treatment"],
               results["col_action"][results["group"]=="control"],
               results["col_action"][results["group"]=="control_new"])

#%% Lifestyle changes by group (one-way anova)
stats.f_oneway(results["lifestyle"][results["group"]=="treatment"],
               results["lifestyle"][results["group"]=="control"],
               results["lifestyle"][results["group"]=="control_new"])

#%% Self-education by group (one-way anova)
stats.f_oneway(results["self_ed"][results["group"]=="treatment"],
               results["self_ed"][results["group"]=="control"],
               results["self_ed"][results["group"]=="control_new"])


#%% Make a bar plot comparing average rates of collective action and total action by group
act_by_grp = results.groupby("group")["tot_action", "col_action"].mean()
act_by_grp = act_by_grp.transpose()
act_by_grp = act_by_grp.loc[:,['treatment','control', 'control_new']]
act_by_grp = act_by_grp.rename(index={"group": "Group", "tot_action": "Total action", "col_action": "Collective action"})
act_by_grp = act_by_grp.rename(columns={'control':f'Survey-only control (n={len(ctrl_1_complete["ID"])})', 'treatment': f'Treatment (n={len(par_followup["ID"])})', 'control_new': f'No-intervention control (n={len(ctrl_new["ID"])})'})
act_by_grp

#%% Calculate 95% confidence intervals for each mean

#Define a function to calculate a 95% confidence interval range of mean of list
import math

def ci_range(list):
    ci_range=1.96*np.std(list)/math.sqrt(len(list))
    return ci_range

par_ci_ranges = []
par_ci_ranges.append(ci_range(par_res['tot_action']))
par_ci_ranges.append(ci_range(par_res['col_action']))

ctrl_ci_ranges = []
ctrl_ci_ranges.append(ci_range(ctrl_res['tot_action']))
ctrl_ci_ranges.append(ci_range(ctrl_res['col_action']))

new_ctrl_ci_ranges = []
new_ctrl_ci_ranges.append(ci_range(new_ctrl_res['tot_action']))
new_ctrl_ci_ranges.append(ci_range(new_ctrl_res['col_action']))

#%%
plt.figure()

# Numbers of pairs of bars you want
N = 2

# Data on X-axis

# Position of bars on x-axis
ind = np.arange(N)

width = 0.3    

# Plotting
plt.bar(ind, act_by_grp[f'Treatment (n={len(par_followup["ID"])})'], width, label=f'Treatment (n={len(par_followup["ID"])})', yerr=par_ci_ranges)
plt.bar(ind + width, act_by_grp[f'Survey-only control (n={len(ctrl_1_complete["ID"])})'], width, label=f'Survey-only control (n={len(ctrl_1_complete["ID"])})', yerr=ctrl_ci_ranges)
plt.bar(ind + width + width, act_by_grp[f'No-intervention control (n={len(ctrl_new["ID"])})'], width, label=f'No-intervention control (n={len(ctrl_new["ID"])})', yerr=new_ctrl_ci_ranges)

plt.ylabel("Average number of actions per participant")

# First argument - A list of positions at which ticks should be placed
# Second argument -  A list of labels to place at the given locations
plt.xticks(ind + width, ['Total action', 'Collective action'], fontsize=7)

# Finding the best position for legends and putting it
plt.legend(loc='best')

#plt.bar(action_types, percent_of_partners)
#plt.bar(action_types, percent_of_ctrl)
#plt.bar(action_types, percent_of_new_ctrl)
plt.savefig('actions_by_group_col_total_average.pdf', bbox_inches='tight')

#%%

#---------------------------------
# Attitudes: treatment vs control
#---------------------------------
#%% Plot concern, likelihood, knowledge, and efficacy by group (pre/post)
from scipy.stats import sem
#%%
measurements = ['Pre', 'Post']

par_concern_prior = [x for x in par_res['concern_prior'] if str(x) != 'nan']
par_concern_3 = [x for x in par_res['concern_3'] if str(x) != 'nan']
par_likelihood_prior = [x for x in par_res['likelihood_prior'] if str(x) != 'nan']
par_likelihood_3 = [x for x in par_res['likelihood_3'] if str(x) != 'nan']
par_knowledge_prior = [x for x in par_res['knowledge_prior'] if str(x) != 'nan']
par_knowledge_3 = [x for x in par_res['knowledge_3'] if str(x) != 'nan']
par_efficacy_prior = [x for x in par_res['efficacy_prior'] if str(x) != 'nan']
par_efficacy_3 = [x for x in par_res['efficacy_3'] if str(x) != 'nan']

ctrl_concern_prior = [x for x in ctrl_res['concern_prior'] if str(x) != 'nan']
ctrl_concern_3 = [x for x in ctrl_res['concern_3'] if str(x) != 'nan']
ctrl_likelihood_prior = [x for x in ctrl_res['likelihood_prior'] if str(x) != 'nan']
ctrl_likelihood_3 = [x for x in ctrl_res['likelihood_3'] if str(x) != 'nan']
ctrl_knowledge_prior = [x for x in ctrl_res['knowledge_prior'] if str(x) != 'nan']
ctrl_knowledge_3 = [x for x in ctrl_res['knowledge_3'] if str(x) != 'nan']
ctrl_efficacy_prior = [x for x in ctrl_res['efficacy_prior'] if str(x) != 'nan']
ctrl_efficacy_3 = [x for x in ctrl_res['efficacy_3'] if str(x) != 'nan']

concern_means_par = [mean(par_concern_prior), mean(par_concern_3)]
concern_means_ctrl = [mean(ctrl_concern_prior), mean(ctrl_concern_3)]
likelihood_means_par = [mean(par_likelihood_prior), mean(par_likelihood_3)]
likelihood_means_ctrl = [mean(ctrl_likelihood_prior), mean(ctrl_likelihood_3)]
knowledge_means_par = [mean(par_knowledge_prior), mean(par_knowledge_3)]
knowledge_means_ctrl = [mean(ctrl_knowledge_prior), mean(ctrl_knowledge_3)]
efficacy_means_par = [mean(par_efficacy_prior), mean(par_efficacy_3)]
efficacy_means_ctrl = [mean(ctrl_efficacy_prior), mean(ctrl_efficacy_3)]

#Calculate range of 95% confidence interval (Z = 1.96)

concern_cis_par =  [ci_range(par_concern_prior), ci_range(par_concern_3)]
concern_cis_ctrl = [ci_range(ctrl_concern_prior), ci_range(ctrl_concern_3)]
likelihood_cis_par =  [ci_range(par_likelihood_prior), ci_range(par_likelihood_3)]
likelihood_cis_ctrl = [ci_range(ctrl_likelihood_prior), ci_range(ctrl_likelihood_3)]
knowledge_cis_par =  [ci_range(par_knowledge_prior), ci_range(par_knowledge_3)]
knowledge_cis_ctrl = [ci_range(ctrl_knowledge_prior), ci_range(ctrl_knowledge_3)]
efficacy_cis_par =  [ci_range(par_efficacy_prior), ci_range(par_efficacy_3)]
efficacy_cis_ctrl = [ci_range(ctrl_efficacy_prior), ci_range(ctrl_efficacy_3)]


fig = plt.figure()
gs = fig.add_gridspec(2,2, hspace=0.4)
axs = gs.subplots(sharey=True)
plt.ylim(ymax = 4, ymin = 0)

def att_plot(x_axis, y_axis, att_means_par, att_means_ctrl, att_sems_par, att_sems_ctrl, att_name, title):
    #plt.figure()
    axs[x_axis, y_axis].errorbar(measurements, att_means_par, label=f'Treatment (n={len(par_followup["ID"])})', yerr=att_sems_par)
    axs[x_axis, y_axis].errorbar(measurements, att_means_ctrl, label=f'Survey-only control (n={len(ctrl_1_complete["ID"])})', yerr=att_sems_ctrl)
    axs[x_axis, y_axis].set_title(title)

att_plot(1, 0, concern_means_par, concern_means_ctrl, concern_cis_par, concern_cis_ctrl, 'concerned', 'Concern')
att_plot(0, 1, likelihood_means_par, likelihood_means_ctrl, likelihood_cis_par, likelihood_cis_ctrl, 'likely to act', 'Intention to take action')
att_plot(0, 0, knowledge_means_par, knowledge_means_ctrl, knowledge_cis_par, knowledge_cis_ctrl, 'knowledgeable', 'Knowledge')
att_plot(1, 1, efficacy_means_par, efficacy_means_ctrl, efficacy_cis_par, efficacy_cis_ctrl, 'effective', 'Perceived efficacy')

plt.legend(loc='upper left', fontsize="x-small")
plt.savefig('Attitudes.pdf', bbox_inches='tight')

#%% Print mean attitude changes
print("Percent change in partner knowledge:", ((mean(par_knowledge_3) - mean(par_knowledge_prior))/likert*100))
print("Percent change in control knowledge:", ((mean(ctrl_knowledge_3) - mean(ctrl_knowledge_prior))/likert*100))
print("Percent change in partner likelihood:", ((mean(par_likelihood_3) - mean(par_likelihood_prior))/likert*100))
print("Percent change in control likelihood:", ((mean(ctrl_likelihood_3) - mean(ctrl_likelihood_prior))/likert*100))
print("Percent change in partner concern:", ((mean(par_concern_3) - mean(par_concern_prior))/likert*100))
print("Percent change in control concern:", ((mean(ctrl_concern_3) - mean(ctrl_concern_prior))/likert*100))
print("Percent change in partner efficacy:", ((mean(par_efficacy_3) - mean(par_efficacy_prior))/likert*100))
print("Percent change in control efficacy:", ((mean(ctrl_efficacy_3) - mean(ctrl_efficacy_prior))/likert*100))

#%% Check if efficacy changed within each group from pre to post-3

#Reorganize dataframe into long format
eff_melt = pd.melt(par_res, id_vars=['ID', 'group'], 
                   value_vars=['efficacy_prior', 'efficacy_1', 'efficacy_2', 'efficacy_3', 'efficacy_followup'],
                var_name='time', value_name='efficacy_score')

#Rename time levels
eff_melt['time'] = eff_melt['time'].map({'efficacy_prior': 'pre', 'efficacy_1':'post-1', 'efficacy_2': 'post-2', 'efficacy_3': 'post-3', 'efficacy_followup': 'followup'})
eff_melt.head(8)

ax = sns.boxplot(x='time', y='efficacy_score', data=eff_melt, color='#99c2a2')
ax = sns.swarmplot(x='time', y='efficacy_score', data=eff_melt, color='#7d0013')
plt.show()

#%%Check if efficacy changed within treatment group from pre to post-3

res = pg.rm_anova(dv='efficacy_score', within='time', subject='ID', data=eff_melt, detailed=True)
res
#Efficacy significantly increased over the course of the study in the treatment group (0.016755)

#%% Check if efficacy changed within survey-only control group from pre to post-3   

#Reorganize dataframe into long format
eff_ctrl = pd.melt(ctrl_res, id_vars=['ID', 'group'], 
                   value_vars=['efficacy_prior', 'efficacy_1', 'efficacy_2', 'efficacy_3', 'efficacy_followup'],
                var_name='time', value_name='efficacy_score')

#Rename time levels
eff_ctrl['time'] = eff_ctrl['time'].map({'efficacy_prior': 'pre', 'efficacy_1':'post-1', 'efficacy_2': 'post-2', 'efficacy_3': 'post-3', 'efficacy_followup': 'followup'})
eff_ctrl.head(8)

ax = sns.boxplot(x='time', y='efficacy_score', data=eff_ctrl, color='#99c2a2')
ax = sns.swarmplot(x='time', y='efficacy_score', data=eff_ctrl, color='#7d0013')
plt.show()

res = pg.rm_anova(dv='efficacy_score', within='time', subject='ID', data=eff_ctrl, detailed=True)
res
#No significant increase in efficacy over time in survey-only control

#%% ANCOVA: CJ concern (treatment v control 1) - not significant
print("ANCOVA: CJ concern")
ancova(data=par_ctrl1, dv='cj_concern_3', covar='cj_concern_prior', between='group')

#%% ANCOVA: CJ knowledge (treatment v control 1) - significant!
print("ANCOVA: CJ knowledge") 
ancova(data=par_ctrl1, dv='cj_knowledge_3', covar='cj_knowledge_prior', between='group')

#%% T-test: donation (treatment v control 1) - not significant
stats.ttest_ind(a=par_res['donation'], b=ctrl_res['donation'], equal_var=True)

#%% T-test: donation (treatment v control 2) 
stats.ttest_ind(a=par_res['donation'], b=new_ctrl_res['donation'], equal_var=True)

#%% T-test: donation (control 1 v control 2) 
stats.ttest_ind(a=ctrl_res['donation'], b=new_ctrl_res['donation'], equal_var=True)

#%% One-way anova: Donation by group
stats.f_oneway(results["donation"][results["group"]=="treatment"],
               results["donation"][results["group"]=="control"],
               results["donation"][results["group"]=="control_new"])

#%% T-test: outreach (treatment v control 1) - not significant
stats.ttest_ind(a=par_res['outreach'], b=ctrl_res['outreach'], equal_var=True)

#%% T-test: outreach (treatment v control 2)
stats.ttest_ind(a=par_res['outreach'], b=new_ctrl_res['outreach'], equal_var=True)

#%% T-test: outreach (ctrl 1 v control 2) 
stats.ttest_ind(a=ctrl_res['outreach'], b=new_ctrl_res['outreach'], equal_var=True)

#%% One-way anova: Outreach by group
stats.f_oneway(results["outreach"][results["group"]=="treatment"],
               results["outreach"][results["group"]=="control"],
               results["outreach"][results["group"]=="control_new"])

#%% ANOVA: total action by political views
print("ANOVA: total action ~ politics")
aov = anova(dv='tot_action', between='politics', data=results, detailed=False)
print_table(aov, floatfmt=".3f")

#%% ANOVA: collective action by political views 

print("ANOVA: collective action ~ politics")
aov = anova(dv='col_action', between='politics', data=results, detailed=False)
print_table(aov, floatfmt=".3f")

#No significant difference in collective action across people with different political views

#%% ANCOVA: concern, treatment v control 1 - significant!
print("ANCOVA: Concern")
ancova(data=par_ctrl1, dv='concern_3', covar='concern_prior', between='group')

#%%
print(mean(par_pre1['concern_pre1']))
print(mean(par_3['concern_3']))

#%% ANCOVA: knowledge change (treatment v control 1) - significant!
print("ANCOVA: Knowledge")
ancova(data=results, dv='knowledge_3', covar='knowledge_prior', between='group')
#Found a significant difference!

print('Prior knowledge (treatment group):', np.nanmean(par_res['knowledge_prior']))
print('Knowledge post-3 (treatment group):', np.nanmean(par_res['knowledge_3']))
print('Prior knowledge (control group):', mean(ctrl_res['knowledge_prior']))
print('Knowledge post-3 (control group):', mean(ctrl_res['knowledge_3']))

par_change_knowledge = mean(par_3['knowledge_3'])- mean(par_pre1['knowledge_pre1'])
ctrl_change_knowledge = mean(ctrl_3['knowledge_3'])- mean(ctrl_1['knowledge_1'])
print("On average, the treatment group's aggregated knowledge score increased by", par_change_knowledge)
print("On average, the control group's aggregated knowledge score increased by", ctrl_change_knowledge)

#%% ANCOVA: does change in likelihood differ across the treatment and control 1? - significant!
print("ANCOVA: likelihood")
ancova(data=par_ctrl1, dv='likelihood_3', covar='likelihood_prior', between='group')

#%%
print('Prior likelihood (treatment group):', np.nanmean(par_res['likelihood_prior']))
print('Post-3 likelihood (treatment group):', np.nanmean(par_res['likelihood_3']))

par_change_lik = mean(par_3['likelihood_3'])- mean(par_pre1['likelihood_pre1'])
ctrl_change_lik = mean(ctrl_3['likelihood_3'])- mean(ctrl_1['likelihood_1'])
print("On average, the treatment group's intention to act increased by", par_change_lik)
print("On average, the control group's intention to act increased by", ctrl_change_lik)
#%% ANCOVA: change in perceived efficacy (treatment v control) - not significant
print("ANCOVA: efficacy")
ancova(data=par_ctrl1, dv='efficacy_3', covar='efficacy_prior', between='group')

#---------------------------------
#--Actions: Correlations--
#---------------------------------

#%% Spearmanr: Lifestyle changes don't correlate with collective action
spearmanr(results['lifestyle'], results['col_action'])
#%% Test if intention to take collective action correlates with collective action in treatment and ctrl 1 (Spearman's r) - no correlation
par_ctrl1_nona = par_ctrl1[~par_ctrl1['col_action'].isna()]
par_ctrl1_nona = par_ctrl1_nona[~par_ctrl1_nona['lik_col_mean'].isna()]

spearmanr(par_ctrl1_nona['lik_col_mean'], par_ctrl1_nona['col_action'])
#%% Test if intention to take action correlates with total action (Spearman's r) in treatment and ctrl 1 - It does
par_ctrl1_nona = par_ctrl1[~par_ctrl1['tot_action'].isna()]
par_ctrl1_nona = par_ctrl1_nona[~par_ctrl1_nona['likelihood_mean'].isna()]

spearmanr(par_ctrl1_nona['likelihood_mean'], par_ctrl1_nona['tot_action'])

#%% #Did knowledge increase from baseline to post-3 in the treatment group? (t-test) - Yes (p < 0.001)
nona = par_res[~par_res['knowledge_prior'].isna()]
nona = nona[~nona['knowledge_3'].isna()]

print(stats.ttest_rel(nona['knowledge_prior'], nona['knowledge_3']))
#%% Did concern increase from baseline to post-3 in the treatment group? (t-test) - No
nona = par_res[~par_res['concern_prior'].isna()]
nona = nona[~nona['concern_3'].isna()]

print(stats.ttest_rel(nona['concern_prior'], nona['concern_3']))
#%% Did likelihood of action increase from baseline to post-3 in the treatment group? (t-test) - yes! (p < 0.001)
nona = par_res[~par_res['likelihood_prior'].isna()]
nona = nona[~nona['likelihood_3'].isna()]

print(stats.ttest_rel(nona['likelihood_prior'], nona['likelihood_3']))
mean_lik_post = np.nanmean(results['likelihood_3'])
mean_lik_pre = np.nanmean(results['likelihood_prior'])
print('Mean likelihood (prior):', mean_lik_pre, 'Mean likelihood (post)', mean_lik_post)
#%% Did perceived efficacy increase from baseline to post-3 in the treatment group? (t-test) - yes! (p = 0.004)
nona = par_res[~par_res['efficacy_prior'].isna()]
nona = nona[~nona['efficacy_3'].isna()]

print(stats.ttest_rel(nona['efficacy_prior'], nona['efficacy_3']))
mean_eff_post = np.nanmean(results['efficacy_3'])
mean_eff_pre = np.nanmean(results['efficacy_prior'])
print('Mean efficacy (prior):', mean_eff_pre, 'Mean efficacy (post)', mean_eff_post)
#%% Did climate justice concern increase from baseline to post-3 in the treatment group? (t-test) - no

nona = par_res[~par_res['cj_concern_delta'].isna()]
print(stats.ttest_rel(nona['cj_concern_prior'], nona['cj_concern_3']))

#%% Did climate justice knowledge increase from baseline to post-3? (t-test) - yes! (p < 0.001)
nona = par_res[~par_res['cj_knowledge_delta'].isna()]

print('Mean CJ knowledge change score:', np.nanmean(results['cj_knowledge_delta']))
print('Mean CJ knowledge (pre):', np.nanmean(results['cj_knowledge_prior']))
print('Mean CJ knowledge (post):', np.nanmean(results['cj_knowledge_3']))
print(stats.ttest_rel(nona['cj_knowledge_prior'], nona['cj_knowledge_3']))
#-----------------------------------------
# Discourse - outcome correlations
#-----------------------------------------
#%% Define a function to automatically run correlation tests
def correlater (var1, var2, df):
    rho, p = spearmanr(df[var1], df[var2])
    if p < 0.05:
        print(var1, 'is significantly correlated with', var2)
        print('rho', '(', (len(df[var1])-2), '):', rho, 'p:', p)
#%% Choose which independent and dependent variables to test

ind_vars = ['invitation_talk', 'suggestion_talk', 'challenge_lifestyle_talk', 'action_talk', 'action_explaining', 'action_planning', 
            'action_effectiveness', 'action_support', 'action_wins', 'solution_talk', 'lifestyle_talk', 
            'justice_talk', 'closeness', 'barrier_talk', 'politics']

            #Add compliments, suggestions, affirmations?

dep_vars = ['tot_action', 'col_action', 'outreach', 'self_ed', 'donation',
            'efficacy_delta', 'concern_delta', 'likelihood_delta', 'cj_concern_delta', 'cj_knowledge_delta']

#%% Test for correlations between all dependent and independent variables
for ind in ind_vars:
    results_nona = results.dropna(subset=[ind])
    for dep in dep_vars:
        results_nona = results_nona.dropna(subset=[dep])
        correlater(ind, dep, results_nona)

#%%
plt.scatter(action_explaining, efficacy_delta)

#%%
groups_mean = results.groupby('action_explaining').mean()
groups_std = results.groupby('action_explaining').std()
plt.errorbar(groups_mean.index,groups_mean['efficacy_delta'],yerr=groups_std['efficacy_delta'])
#%%  Does perceived efficacy pre-1 correlate with partners' challenging of lifestyle changes? (spearman's r) - Yes, negatively
# The more effective partners initially felt they are, the less they challenged lifestyle changes!
nona = results[~results["challenge_lifestyle_talk_par"].isna()]
nona = nona[~nona["efficacy_prior"].isna()]

r, p = spearmanr(nona['challenge_lifestyle_talk_par'], nona['efficacy_prior'])
print('r:', r, 'p:', p, 'n=', len(nona['tot_action']))
#%% Find the ratio of activists' challenging lifestyle changes per partners' lifestyle talk (lif_rat)
#Make a new df that only includes cases where partners mentioned lifestyle changes as least once
lif_df = results.query('lifestyle_talk_par > 0')

#Make a score, lif_rat, of the ratio of challenging lifestyle changes per lifestyle talk
lif_rat = [int(i) / int(j) for i,j in zip(lif_df['challenge_lifestyle_talk'], lif_df['lifestyle_talk_par'])]

#Append the ratio scores to the lifestyle dataframe
lif_df['lif_rat'] = lif_rat
#%% The ratio of challenging lifestyle changes is not correlated with collective action (spearman)
rho, p = spearmanr(lif_df['lif_rat'], lif_df['col_action'])
print('rho:', rho, 'p:', p)
print('n =', len(lif_df['tot_action']))

#%% The ratio of challenging lifestyle changes is not correlated with total action (spearman)
rho, p = spearmanr(lif_df['lif_rat'], lif_df['tot_action'])
print('r:', rho, 'p:', p)
print('n =', len(lif_df['tot_action']))

#%% Challenging lifestyle changes isn't correlated with lifestyle changes (spearman)
rho, p = spearmanr(lif_df['lif_rat'], lif_df['lifestyle'])
print('r:', rho, 'p:', p)
print('n =', len(lif_df['tot_action']))

plt.scatter(lif_df['lif_rat'], lif_df['lifestyle'])
plt.show()

#%% Let's try SEM!

efficacy_delta = par_res['efficacy_delta']
action_explaining = par_res['action_explaining']
col_action = par_res['col_action']
invitation_talk = par_res['invitation_talk']
action_talk = par_res['action_talk']

desc = '''efficacy_delta ~ action_explaining
          col_action ~ efficacy_delta + action_talk + invitation_talk + action_explaining'''
model = Model(desc)
res_opt = model.fit(results)
estimates = model.inspect()
print(estimates)
# %%
